diff --git a/.gitignore b/.gitignore
index 7591726..8b262b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ test_*.py
.planning/
.ruff_cache/
Start_MCP_Server.bat
+
diff --git a/src/teradata_mcp_server/app.py b/src/teradata_mcp_server/app.py
index aab1366..c9d0c8d 100644
--- a/src/teradata_mcp_server/app.py
+++ b/src/teradata_mcp_server/app.py
@@ -34,6 +34,7 @@
from teradata_mcp_server.config import Settings
from teradata_mcp_server.middleware import RequestContextMiddleware
from teradata_mcp_server.tools import ContextCatalog
+from teradata_mcp_server.tools.graph.graph_edge_contract import GRAPH_EDGE_CONTRACT
from teradata_mcp_server.tools.utils import (
convert_tdml_docstring_to_mcp_docstring,
execute_analytic_function,
@@ -1287,5 +1288,19 @@ def get_glossary_term(term_name: str) -> dict[str, Any]:
else:
return {"error": f"Glossary term not found: {term_name}"}
+ # ── Graph Edge Contract Resource ──────────────────────────────────────
+ # Always registered (static content, no YAML dependency).
+ # AI agents retrieve this to understand the edge_repository schema
+ # required by all graph_* tools.
+ # ──────────────────────────────────────────────────────────────────────
+ if any(re.match(pattern, "graph_edge_contract") for pattern in config.get("resource", [])):
+
+ @mcp.resource("graph://edge-contract")
+ def get_graph_edge_contract() -> str:
+ """Return the Graph Edge Contract schema definition."""
+ return GRAPH_EDGE_CONTRACT
+
+ logger.info("Registered resource: graph_edge_contract")
+
# Return the configured app and some handles used by the entrypoint if needed
return mcp, logger
diff --git a/src/teradata_mcp_server/config/profiles.yml b/src/teradata_mcp_server/config/profiles.yml
index 4141766..59ce35f 100644
--- a/src/teradata_mcp_server/config/profiles.yml
+++ b/src/teradata_mcp_server/config/profiles.yml
@@ -44,7 +44,7 @@ eda:
- "base_(?!(writeQuery|dynamicQuery)$).*"
- qlty_.*
- sec_userDbPermissions
-
+
bar:
tool:
- ^bar_*
@@ -60,4 +60,13 @@ llmUser:
- ^base_*
- ^chat_*
prompt:
- - ^chat_*
\ No newline at end of file
+ - ^chat_*
+
+graph:
+ tool:
+ - ^graph_.*
+ prompt:
+ - ^graph_.*
+ resource:
+ - ^graph_edge_contract$
+
diff --git a/src/teradata_mcp_server/tools/graph/README.md b/src/teradata_mcp_server/tools/graph/README.md
new file mode 100644
index 0000000..16fa96c
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/README.md
@@ -0,0 +1,669 @@
+# Graph Dependency Analysis Tools
+
+**Version:** 3.0
+**Last Updated:** 2026-04-10
+**Purpose:** Directed dependency graph analysis for Teradata object lineage
+
+This package provides seven complementary tools for analysing object dependencies
+in Teradata. All tools are stored-procedure-free — the only Teradata privilege
+required is `SELECT` on an edge repository conforming to the
+[Graph Edge Contract](#graph-edge-contract).
+
+---
+
+## Quick Start
+
+```python
+# Step 0 — Generate an edge repository (once, if you don't have one)
+# For AI-Native Data Products, skip this — use lineage_graph directly:
+# edge_repository="{ProductName}_Semantic.lineage_graph"
+ddl = handle_graph_edgeContractDDL(
+ conn=connection,
+ target_database="MY_PROJECT_Semantic",
+ object_name="EdgeRepository",
+ output_type="TABLE"
+)
+
+# Step 1 — Find root objects (seed points for analysis)
+roots = handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="%MY_PROJECT%",
+ object_types="Table",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+
+# Step 2 — Compute BFS hop distances and group into migration waves
+waves = handle_graph_bfsLevels(
+ conn=connection,
+ root_node_list="MY_DB_STD_T.source_table_a,MY_DB_STD_T.source_table_b",
+ include_containers="MY_DB%",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+
+# Objects grouped by nearest_root = migration wave grouping
+# Objects ordered by downstream_level = deployment sequence within each wave
+
+# Step 3 — Trace lineage and impact paths for a specific object
+lineage = handle_graph_traceLineage(
+ conn=connection,
+ object_name="MY_DB_STD_T.source_table_a",
+ max_depth_down=5,
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+```
+
+---
+
+## Tools
+
+Seven complementary tools covering the full graph analysis workflow:
+
+| Step | Tool | Implementation | Purpose |
+|------|------|---------------|---------|
+| 0 | [`graph_edgeContractDDL`](#graph_edgecontractddl) | Template | Generate edge repository DDL — start here |
+| 1 | [`graph_findRootObjects`](#graph_findrootobjects) | SQL | Discover objects with no upstream dependencies |
+| 2 | [`graph_bfsLevels`](#graph_bfslevels) | Python BFS | Wave planning, deployment sequencing, blast-radius sizing |
+| 3 | [`graph_traceLineage`](#graph_tracelineage) | Python + recursive CTE | Full lineage tracing, impact path analysis, edge detail |
+| 4 | [`graph_detectCycles`](#graph_detectcycles) | Python DFS | Circular reference detection, DAG validation |
+| 5 | [`graph_connectedComponents`](#graph_connectedcomponents) | Python Union-Find | Graph partitioning, isolated sub-graph identification |
+| 6 | [`graph_analyseDatabase`](#graph_analysedatabase) | Composite | All four analyses in one call, one shared edge fetch |
+
+**Typical workflow:** `edgeContractDDL` → `findRootObjects` → `bfsLevels` → `traceLineage` → `detectCycles`
+
+**When to use `graph_analyseDatabase`:** if you need three or more of the individual analyses, use this instead — it fetches the edge set once and shares it across all four analyses in a single MCP response.
+
+---
+
+## Graph Edge Contract
+
+All tools require an **edge repository** — a Teradata table or view conforming to the Graph Edge Contract. The contract defines six required columns and two optional enrichment columns:
+
+### Required Columns
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `Src_Container_Name` | `VARCHAR(128) NOT NULL` | Source (upstream) container — Teradata database name, ETL workflow folder, dbt project, etc. |
+| `Src_Object_Name` | `VARCHAR(128) NOT NULL` | Source object name |
+| `Src_Kind` | `VARCHAR(30) NOT NULL` | Source object type (e.g. `Table`, `View`, `Job`) |
+| `Tgt_Container_Name` | `VARCHAR(128) NOT NULL` | Target (downstream) container |
+| `Tgt_Object_Name` | `VARCHAR(128) NOT NULL` | Target object name |
+| `Tgt_Kind` | `VARCHAR(30) NOT NULL` | Target object type |
+
+### Optional Enrichment Columns
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `Edge_Relationship` | `VARCHAR(50)` | Nature of the edge: `DIRECT`, `ETL_INPUT`, `ETL_OUTPUT`, `JOIN`, `TRANSFORM` |
+| `Transformation_Type` | `VARCHAR(50)` | Process category: `ETL`, `FEATURE_ENG`, `AGGREGATION`, `EMBEDDING_GEN` |
+
+Optional columns are ignored by graph analysis tools but surfaced to graph visualisation clients for edge labelling.
+
+### Edge Semantics
+
+All edges share a single consistent direction — Src is always upstream, Tgt is always downstream. The `Edge_Relationship` optional column carries the semantic label for visualisation clients; the graph analysis tools traverse all edges identically regardless of label.
+
+The same Src→Tgt direction is read differently depending on edge type:
+
+| Edge type | How to read it | Example |
+|---|---|---|
+| Object dependency | Src *is referenced by* Tgt | `CUSTOMER_TABLE` → `CUSTOMER_VIEW` |
+| ETL input | Src *is read by* Tgt | `CUSTOMER_TABLE` → `ETL_LOAD_JOB` |
+| ETL output | Src *writes to* Tgt | `ETL_LOAD_JOB` → `CUSTOMER_FEATURES` |
+
+In all three cases: Src is the prerequisite, Tgt is the consumer. A single edge repository can hold both object dependency edges and data lineage edges and be traversed uniformly by the graph tools.
+
+The `lineage_graph` view (Observability Module v1.5) surfaces ETL jobs as first-class nodes, producing two edges per declared flow:
+- **Leg 1:** `source_table` →*(is read by)*→ `job_name` (`Edge_Relationship = ETL_INPUT`)
+- **Leg 2:** `job_name` →*(writes to)*→ `target_table` (`Edge_Relationship = ETL_OUTPUT`)
+
+This enables end-to-end lineage traversal through jobs, not just between tables.
+
+### AI-Native Data Product Shortcut
+
+If you have a data product built on the [AI-Native Data Product standard](https://github.com/Teradata/ai-native-data-product), the `{ProductName}_Semantic.lineage_graph` view (Observability Module v1.5) already conforms to this contract. Use it directly:
+
+```python
+edge_repository="{ProductName}_Semantic.lineage_graph"
+```
+
+No DDL generation required.
+
+---
+
+## Package Structure
+
+```
+teradata_mcp_server/tools/
+├── graph_tools.py # Registration hub (imports + GRAPH_TOOLS list only)
+├── graph/
+│ ├── __init__.py # Re-exports all handle_* for ModuleLoader
+│ ├── _graph_utils.py # Shared utilities (internal — not an MCP tool)
+│ ├── graph_edge_contract.py # Tool: DDL generator + Graph Edge Contract text
+│ ├── graph_findRootObjects.py # Tool: SQL-based root object discovery
+│ ├── graph_bfsLevels.py # Tool: Pure-Python BFS
+│ ├── graph_traceLineage.py # Tool: Python + recursive CTE lineage analysis
+│ ├── graph_detectCycles.py # Tool: Python Union-Find + iterative DFS
+│ ├── graph_connectedComponents.py # Tool: Python Union-Find WCC analysis
+│ └── graph_analyseDatabase.py # Tool: Composite single-fetch analysis
+└── utils.py # Shared MCP utilities
+```
+
+`graph_tools.py` is intentionally thin — it contains no logic, only imports and the `GRAPH_TOOLS` registration list. See the comments in that file for the rationale.
+
+`_graph_utils.py` is an internal module. It is not registered as an MCP tool. It exports:
+- `parse_csv_patterns` — normalise CSV input strings
+- `build_like_or` — build single-column LIKE clauses for SQL WHERE
+- `bfs_safe_int` — safe int conversion for nullable level columns
+- `create_bfs_summary` — BFS result statistics
+- `extract_cycle_candidates` — identify direction=BOTH nodes
+
+---
+
+## Tool Reference
+
+### `graph_edgeContractDDL`
+
+Generate Teradata DDL for a Graph Edge Contract-conforming edge repository.
+
+Call this first if you don't yet have an edge repository. No database connection is used — DDL is returned as text ready to run.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `target_database` | string | — | ✅ | Database in which to create the edge repository.
For AI-Native Data Products: `{ProductName}_Semantic` |
+| `object_name` | string | `EdgeRepository` | ❌ | Name for the edge table or view |
+| `output_type` | string | `TABLE` | ❌ | `TABLE`: CREATE TABLE DDL + sample DML
`VIEW`: customisable template for mapping an existing lineage source |
+
+#### Example
+
+```python
+# Generate a CREATE TABLE with sample DML
+result = handle_graph_edgeContractDDL(
+ conn=connection,
+ target_database="MY_PROJECT_Semantic",
+ object_name="EdgeRepository",
+ output_type="TABLE"
+)
+print(result[0]['ddl']) # Run this in Teradata
+print(result[0]['sample_dml']) # Optional: insert sample rows
+
+# Generate a VIEW template to wrap an existing lineage source
+result = handle_graph_edgeContractDDL(
+ conn=connection,
+ target_database="MY_PROJECT_Semantic",
+ object_name="lineage_graph",
+ output_type="VIEW"
+)
+```
+
+---
+
+### `graph_findRootObjects`
+
+Find objects with no upstream dependencies in specified containers.
+
+Root objects are foundational data sources that nothing else feeds into. They are the natural starting points for downstream impact analysis and migration wave planning.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `container_pattern` | string | — | ✅ | Database/schema LIKE pattern(s). Supports `%` wildcards and CSV.
Examples: `MY_DB%`, `%PROJECT_A%,%PROJECT_B%` |
+| `exclude_objects` | string | `''` | ❌ | LIKE patterns to exclude. Matches `Container.Object`.
Example: `SANDBOX%,%.temp_%` |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract.
AI-Native Data Products: `{ProductName}_Semantic.lineage_graph` |
+| `object_types` | string | `''` | ❌ | Filter by object type: `Table`, `View`, `Procedure`, `Macro`.
CSV supported: `Table,View`. Empty = all types. |
+| `return_format` | string | `detailed` | ❌ | `detailed` — full list with metadata
`summary` — statistics only |
+
+#### Use Cases
+
+| Use Case | Configuration |
+|----------|---------------|
+| Migration seed discovery | `container_pattern="%MY_PROJECT%"` |
+| Source table discovery | `object_types="Table"` |
+| Exclude sandbox schemas | `exclude_objects="SANDBOX%,%.temp_%"` |
+| Quick count | `return_format="summary"` |
+
+#### Example
+
+```python
+# Find root tables, ordered by downstream impact
+result = handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="MY_DB_STD_T,MY_DB_STD_V",
+ object_types="Table",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+for obj in result['results']['summary']['top_impact_objects']:
+ print(f" {obj['name']} → {obj['downstream_count']} dependents")
+```
+
+---
+
+### `graph_bfsLevels`
+
+Compute BFS shortest-path hop distances from one or more root nodes.
+
+**Implementation:** Pure Python — One SQL round-trip fetches the scoped edge set; all BFS computation runs in the MCP server process.
+
+**Use this tool for:** deployment sequencing, migration wave grouping, blast-radius sizing, cycle candidate depth analysis.
+
+**Do not use this tool for:** lineage tracing, impact path detail, edge-level analysis — use `graph_traceLineage` for those.
+
+#### Direction Convention
+
+Each edge row: `Src` "is referenced by" `Tgt` → Src is the dependency (upstream); Tgt is the dependent (downstream).
+
+| Direction | Traversal | Meaning |
+|-----------|-----------|---------|
+| Upstream BFS | Reverse adjacency (Tgt → Src) | Discovers what a node depends on |
+| Downstream BFS | Forward adjacency (Src → Tgt) | Discovers what depends on a node |
+
+Root objects with in-degree zero correctly show `upstream_level=None` for all non-root nodes — they have no upstream sources.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `root_node_list` | string | — | ✅ | CSV of exact fully-qualified node names. No wildcards.
Example: `MY_DB.table_a,MY_DB.table_b` |
+| `max_depth_up` | integer | `10` | ❌ | Maximum upstream hops. `0` = skip upstream analysis. |
+| `max_depth_down` | integer | `10` | ❌ | Maximum downstream hops. `0` = skip downstream analysis. |
+| `exclude_objects` | string | `''` | ❌ | CSV LIKE patterns to exclude from BFS traversal |
+| `include_containers` | string | `''` | ❌ | CSV container LIKE patterns (whitelist). Always supply when scope is known — pushed into SQL to reduce fetch volume. |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract |
+
+#### Example
+
+```python
+# Wave planning: downstream only, scoped to project containers
+result = handle_graph_bfsLevels(
+ conn=connection,
+ root_node_list="MY_DB_STD_T.source_a,MY_DB_STD_T.source_b",
+ max_depth_up=0,
+ max_depth_down=10,
+ include_containers="MY_DB%,REPORTING%",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+# Sort by downstream_level ascending for deployment order
+# Group by nearest_root for wave assignment
+```
+
+---
+
+### `graph_traceLineage`
+
+Analyse object dependencies — finds upstream dependencies (what the object depends on) and downstream dependents (what depends on the object).
+
+**Implementation:** Hybrid — Python constructs Teradata recursive CTEs that execute entirely server-side. Only the reachable subgraph crosses the network.
+
+**Use this tool for:** impact analysis, lineage tracing, pre-deployment validation, edge-level dependency detail.
+
+**Do not use this tool for:** migration wave sequencing — use `graph_bfsLevels` for that.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `object_name` | string | — | ✅ | Object name pattern(s). Supports `%` wildcards and CSV.
Single: `MY_DB.my_table`
Wildcard: `MY_DB%.%`
Multiple: `MY_DB_A.%,MY_DB_B.%` |
+| `max_depth_up` | integer | `3` | ❌ | Maximum upstream levels to traverse (0–10) |
+| `max_depth_down` | integer | `3` | ❌ | Maximum downstream levels to traverse (0–10) |
+| `exclude_objects` | string | `''` | ❌ | CSV LIKE patterns to exclude. Matches `DB.Object` format. |
+| `include_containers` | string | `''` | ❌ | CSV container LIKE patterns (whitelist). Empty = all containers. |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract |
+| `return_format` | string | `detailed` | ❌ | `detailed`, `summary`, or `edges_only` |
+
+#### Example
+
+```python
+# Full impact analysis — what breaks if this object changes?
+result = handle_graph_traceLineage(
+ conn=connection,
+ object_name="MY_DB_STD_T.core_entity",
+ max_depth_up=0,
+ max_depth_down=5,
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+print(f"Downstream dependents: {len(result['results']['downstream_edges'])}")
+```
+
+---
+
+### `graph_detectCycles`
+
+Detect circular references (cycles) in the dependency graph.
+
+**Implementation:** Pure Python — one SQL SELECT fetches the scoped edge set; Union-Find WCC partitioning followed by iterative DFS cycle detection runs in the MCP server process.
+
+Run this tool before wave planning to confirm the graph is a valid DAG. A cycle will cause topological sort to hang silently.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `container_pattern` | string | — | ✅ | CSV LIKE patterns for container scope.
Example: `MY_DB%` or `%PROJECT_A%,%PROJECT_B%` |
+| `exclude_objects` | string | `''` | ❌ | CSV LIKE patterns to exclude from the scan |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract |
+
+#### Example
+
+```python
+result = handle_graph_detectCycles(
+ conn=connection,
+ container_pattern="MY_DB%",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+print(result['results']['summary_stats'][0]['Summary_Message'])
+# "No cycles detected — graph is a DAG."
+# or: "3 cycle(s) detected."
+for cycle in result['results']['cycle_summaries']:
+ print(f" Cycle {cycle['Cycle_Id']}: {cycle['Cycle_Path']}")
+```
+
+---
+
+### `graph_connectedComponents`
+
+Identify all Weakly Connected Components (WCC) in the dependency graph.
+
+**Implementation:** Pure Python — one SQL SELECT, then Union-Find WCC partitioning in the MCP server process.
+
+A connected component is a maximal set of nodes reachable from one another when edge direction is ignored. Use this to understand graph structure, identify isolated sub-graphs, and pre-filter before cycle detection.
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `container_pattern` | string | — | ✅ | CSV LIKE patterns for container scope |
+| `exclude_objects` | string | `''` | ❌ | CSV LIKE patterns to exclude from the scan |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract |
+
+#### Example
+
+```python
+result = handle_graph_connectedComponents(
+ conn=connection,
+ container_pattern="MY_DB%",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+stats = result['results']['summary_stats'][0]
+print(f"{stats['Component_Count']} components, "
+ f"largest has {stats['Largest_Component']} nodes")
+```
+
+---
+
+### `graph_analyseDatabase`
+
+Composite analysis — runs root object discovery, connected component analysis, cycle detection, and BFS wave planning in a **single MCP call** with **one shared edge fetch**.
+
+Use this instead of calling the four individual tools when you need two or more of those analyses together. It eliminates the scalability bottleneck of serial MCP round-trips (4 SQL fetches → 1; 4 MCP responses → 1).
+
+#### Parameters
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| `container_pattern` | string | — | ✅ | CSV LIKE patterns for container scope |
+| `exclude_objects` | string | `''` | ❌ | CSV LIKE patterns to exclude |
+| `top_n_roots` | integer | `4` | ❌ | Number of top root objects (by downstream impact) to include in BFS wave analysis |
+| `max_depth_down` | integer | `10` | ❌ | Maximum downstream BFS hops from roots |
+| `max_depth_up` | integer | `0` | ❌ | Maximum upstream BFS hops. `0` = skip upstream. |
+| `edge_repository` | string | — | ✅ | Edge repository conforming to the Graph Edge Contract |
+
+#### Example
+
+```python
+# Full database readiness assessment — one call
+result = handle_graph_analyseDatabase(
+ conn=connection,
+ container_pattern="MY_DB%",
+ top_n_roots=6,
+ max_depth_down=10,
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+
+root_count = result['results']['root_objects']['summary']['total_root_objects']
+cycle_count = result['results']['cycles']['stats'][0]['Cycle_Count']
+comp_count = result['results']['components']['stats'][0]['Component_Count']
+bfs_nodes = result['results']['bfs_waves']['summary']['total_nodes']
+total_ms = result['results']['edge_stats']['total_time_ms']
+
+print(f"{root_count} roots | {cycle_count} cycles | "
+ f"{comp_count} components | {bfs_nodes} BFS nodes | {total_ms}ms")
+```
+
+---
+
+## Architecture
+
+### Python/SQL Design
+
+The only Teradata privilege required across the entire package is `SELECT` on the edge repository view or table.
+
+| Tool | Implementation strategy |
+|------|------------------------|
+| `graph_edgeContractDDL` | Pure template generation — no SQL executed |
+| `graph_findRootObjects` | Single SQL SELECT with NOT EXISTS subquery |
+| `graph_bfsLevels` | One bulk edge SELECT; standard queue-based BFS (O(V+E)) in Python |
+| `graph_traceLineage` | Python constructs recursive CTEs; traversal runs server-side in Teradata spool |
+| `graph_detectCycles` | One scoped edge SELECT; Union-Find WCC + iterative DFS in Python |
+| `graph_connectedComponents` | One scoped edge SELECT; path-compressed Union-Find in Python |
+| `graph_analyseDatabase` | One shared edge SELECT; all four algorithms run in Python |
+
+### Progressive Disclosure
+
+The package supports both MCP registration modes simultaneously:
+
+- **Static mode:** `graph_tools.py` → `GRAPH_TOOLS` list → MCP server registration at startup
+- **Progressive Disclosure mode:** `__init__.py` → ModuleLoader discovers `handle_*` functions → `ContextCatalog` registers them using docstrings
+
+In Progressive Disclosure mode the ContextCatalog uses the function docstrings for both approximate-match summaries and exact-match full documentation. The `*_TOOL` descriptor dicts serve static mode only.
+
+---
+
+## Dependencies
+
+### Teradata
+
+| Requirement | Details |
+|------------|---------|
+| `SELECT` on edge repository | The only privilege required — applies to all tools |
+| Edge repository | A table or view conforming to the Graph Edge Contract.
Generate one with `graph_edgeContractDDL`, or use an existing `{ProductName}_Semantic.lineage_graph` view. |
+
+No server-side DDL objects required.
+
+### Python
+
+All packages are standard library or already included in the base MCP server:
+
+| Package | Used by | Source |
+|---------|---------|--------|
+| `teradatasql` | All tools | MCP server base |
+| `collections` | `graph_bfsLevels`, `graph_analyseDatabase` | Standard library |
+| `fnmatch` | `graph_bfsLevels` | Standard library |
+| `logging` | All tools | Standard library |
+
+---
+
+## Installation
+
+### File Placement
+
+```
+teradata_mcp_server/tools/
+├── graph_tools.py
+├── graph/
+│ ├── __init__.py
+│ ├── _graph_utils.py
+│ ├── graph_edge_contract.py
+│ ├── graph_findRootObjects.py
+│ ├── graph_bfsLevels.py
+│ ├── graph_traceLineage.py
+│ ├── graph_detectCycles.py
+│ ├── graph_connectedComponents.py
+│ └── graph_analyseDatabase.py
+└── utils.py
+```
+
+### Configuration
+
+Add to your `profiles.yml`:
+
+```yaml
+graph:
+ allmodule: True
+ tool:
+ graph_edgeContractDDL: True
+ graph_findRootObjects: True
+ graph_bfsLevels: True
+ graph_traceLineage: True
+ graph_detectCycles: True
+ graph_connectedComponents: True
+ graph_analyseDatabase: True
+```
+
+---
+
+## Performance
+
+### Key Principles
+
+**Always supply `include_containers` for `graph_bfsLevels`** — this filter is pushed into the SQL WHERE clause, dramatically reducing edge fetch volume. Without it, every edge in the repository is fetched. One additional LIKE pattern costs almost nothing; fetching a million irrelevant edges costs significantly.
+
+**Use `graph_analyseDatabase` when you need multiple analyses** — it runs four analyses from one edge fetch instead of four separate fetches.
+
+**Start with `max_depth=3` for `graph_traceLineage`** — incrementally increase only if needed. Recursive CTE depth directly affects server-side spool consumption.
+
+**Use `exclude_objects` aggressively** — filter out sandbox schemas, temporary objects, and personal schemas. Document and version-control your team's standard exclusion patterns.
+
+**Run `graph_detectCycles` before wave planning** — a cycle will cause topological sort to hang silently.
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **Empty BFS results** | Root node FQ name incorrect | Verify exact name via `graph_findRootObjects` — no wildcards in `root_node_list` |
+| **`upstream_level` always None** | Correct behaviour for root objects | Root objects with in-degree zero have no upstream sources — this is expected |
+| **Large edge fetch for BFS** | No `include_containers` specified | Always supply `include_containers` when scope is known |
+| **Query timeout** | Depth too high or large graph | Reduce `max_depth` or add `exclude_objects` / `include_containers` |
+| **`edge_repository` error** | Parameter not supplied | Pass the FQ name of your edge repository. AI-Native Data Products: `{ProductName}_Semantic.lineage_graph`. Otherwise run `graph_edgeContractDDL` first. |
+| **NULL check violations** | Edge repository has NULL required columns | Run the validation query from the `graph_edgeContractDDL` sample DML output |
+
+### Debug Steps
+
+```python
+# 1. Verify object exists and find exact FQ name
+result = handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="MY_DB_STD_T",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+# Check result for the exact FullyQualifiedName
+
+# 2. Test BFS with minimal scope and shallow depth
+result = handle_graph_bfsLevels(
+ conn=connection,
+ root_node_list="MY_DB_STD_T.my_root_table",
+ max_depth_down=2,
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+
+# 3. Check cycle-free before wave planning
+result = handle_graph_detectCycles(
+ conn=connection,
+ container_pattern="MY_DB%",
+ edge_repository="MY_PROJECT_Semantic.EdgeRepository"
+)
+print(result['results']['summary_stats'][0]['Summary_Message'])
+
+# 4. Validate edge repository conforms to contract
+# (Run the validation query from graph_edgeContractDDL sample_dml output)
+base_readQuery(sql="""
+ SELECT 'NULL_CHECK' AS Validation, COUNT(*) AS Violations
+ FROM MY_PROJECT_Semantic.EdgeRepository
+ WHERE Src_Container_Name IS NULL
+ OR Src_Object_Name IS NULL
+ OR Src_Kind IS NULL
+ OR Tgt_Container_Name IS NULL
+ OR Tgt_Object_Name IS NULL
+ OR Tgt_Kind IS NULL
+""")
+```
+
+---
+
+## Best Practices
+
+1. **Always run `graph_detectCycles` before migration planning** — a cycle will cause topological sort to hang silently.
+
+2. **Use `graph_findRootObjects` to seed `graph_bfsLevels`** — never guess root node names; they must be exact FQ names with no wildcards.
+
+3. **Always supply `include_containers` for `graph_bfsLevels`** — without it, every edge in the repository is fetched regardless of scope.
+
+4. **Deploy in `downstream_level` ascending order within each wave** — depth 0 (root) first, then +1, +2, and so on. Never deploy a consumer before its dependency.
+
+5. **Check `cycle_candidates` in BFS results** — `direction='BOTH'` nodes with unequal absolute levels indicate back-edges. Investigate before treating them as simple dependents.
+
+6. **Prefer `graph_analyseDatabase` for full readiness assessments** — one call, one edge fetch, four analyses.
+
+---
+
+## Future Enhancements
+
+| Tool | Status | Notes |
+|------|--------|-------|
+| `graph_edgeContractDDL` | ✅ v1.1 | Graph Edge Contract v1.1 — optional enrichment columns |
+| `graph_findRootObjects` | ✅ v1.1 | |
+| `graph_bfsLevels` | ✅ v2.0 | SP replaced by pure-Python BFS |
+| `graph_traceLineage` | ✅ v1.0 | Renamed from `graph_queryDependenciesAgent` |
+| `graph_detectCycles` | ✅ v2.0 | SP replaced by Python Union-Find + iterative DFS |
+| `graph_connectedComponents` | ✅ v2.0 | SP replaced by Python Union-Find |
+| `graph_analyseDatabase` | ✅ v1.0 | Composite single-fetch analysis |
+| `graph_findOrphanedObjects` | 🔲 Planned | Objects with no upstream or downstream |
+| `graph_calculateMetrics` | 🔲 Planned | Centrality, clustering coefficient |
+| `graph_suggestRefactoring` | 🔲 Planned | Structure-based refactoring opportunities |
+
+---
+
+## Version History
+
+### 3.0 (2026-04-10)
+
+Compliance pass, Graph Edge Contract v1.1, SP-free architecture for all tools.
+
+- **Rename:** `graph_queryDependenciesAgent` → `graph_traceLineage`. The tool is a deterministic recursive CTE query, not an agent.
+- **New tools:** `graph_edgeContractDDL` (DDL generator + canonical contract text) and `graph_analyseDatabase` (composite single-fetch analysis).
+- **SP-free:** `graph_detectCycles` and `graph_connectedComponents` converted from SP-based to pure-Python (Union-Find WCC + iterative DFS). No stored procedures remain anywhere in the package.
+- **Graph Edge Contract v1.1:** Column names corrected from `SrcContainer`/`SrcObject`/`SrcKind` to `Src_Container_Name`/`Src_Object_Name`/`Src_Kind` (and Tgt equivalents) — prior generated tables were incompatible with the tool SQL. Optional enrichment columns `Edge_Relationship` and `Transformation_Type` added. `Src_Kind`/`Tgt_Kind` COMPRESS lists expanded to cover both single-letter codes and full-word values.
+- **Parameter standardisation:** `object_dependency_table` → `edge_repository`; `excl_patterns` → `exclude_objects` across `graph_detectCycles` and `graph_connectedComponents`.
+- **Dead parameter removal:** `strategy` and `max_edges_for_cte` removed from `graph_detectCycles`.
+- **Helper consolidation (phase 1):** `parse_csv_patterns` and `build_like_or` extracted to `_graph_utils.py`; 10 local copies removed across 6 files.
+- **AI-Native Data Product convention:** `{ProductName}_Semantic.lineage_graph` (Observability Module v1.5) documented as a ready-to-use edge repository requiring no DDL generation.
+- Progressive Disclosure compliance: all 7 tools registered in `GRAPH_TOOLS`; `GRAPH_EDGE_CONTRACT_DDL_TOOL` descriptor added.
+
+### 2.0 (2026-03-31)
+
+Major refactor — modular package structure, SP replaced by Python BFS for `graph_bfsLevels`.
+
+- Split monolithic `graph_tools.py` into one file per tool under `graph/` sub-package
+- `graph_tools.py` reduced to a thin registration hub
+- `graph_bfsLevels` SP replaced by pure-Python BFS — no stored procedure, one SQL round-trip, standard queue-based BFS (O(V+E))
+- BFS traversal direction fix: upstream BFS now correctly uses reverse adjacency (Tgt→Src)
+- Shared BFS helpers extracted to `graph/_graph_utils.py`
+
+### 1.3 (2026-01-15)
+
+Added `graph_connectedComponents` — Weakly Connected Component analysis.
+
+### 1.2 (2025-12-01)
+
+Added `graph_detectCycles` — WCC-partitioned cycle detection.
+
+### 1.1 (2025-03-05)
+
+Added `graph_findRootObjects` — root object discovery with CSV pattern support, object type filtering, and two return formats.
+
+### 1.0 (2025-03-04)
+
+Initial release — `graph_queryDependenciesAgent` (now `graph_traceLineage`): bidirectional dependency analysis via server-side recursive CTEs.
diff --git a/src/teradata_mcp_server/tools/graph/__init__.py b/src/teradata_mcp_server/tools/graph/__init__.py
new file mode 100644
index 0000000..abffc58
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/__init__.py
@@ -0,0 +1,44 @@
+# graph/__init__.py
+"""
+Graph analysis tools package for dependency graph analysis.
+
+This __init__.py re-exports all handle_* functions from the individual
+tool modules so that the MCP server's ModuleLoader can discover them
+via inspect.getmembers() when it loads this package.
+
+The ModuleLoader (module_loader.py) maps the 'graph' prefix to
+'teradata_mcp_server.tools.graph' and then calls:
+
+ module = importlib.import_module('teradata_mcp_server.tools.graph')
+ for name, func in inspect.getmembers(module, inspect.isfunction):
+ all_functions[name] = func
+
+If the handle_* functions are not importable at the package level,
+the ModuleLoader finds nothing and no graph tools are registered.
+
+Import order follows the logical workflow:
+ findRootObjects → bfsLevels → traceLineage
+ → detectCycles → connectedComponents → analyseDatabase (composite)
+
+Author: Paul Dancer — Teradata Consulting Services
+"""
+
+# ── Step 1: Root object discovery (SQL-only) ──────────────────────
+# ── Step 6: Composite analysis (single call, shared edge fetch) ──
+from .graph_analyse_database import handle_graph_analyseDatabase
+
+# ── Step 2: BFS wave planning (pure Python) ───────────────────────
+from .graph_bfs_levels import handle_graph_bfsLevels
+
+# ── Step 5: Connected components (Python Union-Find WCC) ─────────
+from .graph_connected_components import handle_graph_connectedComponents
+
+# ── Step 4: Cycle detection (Python Union-Find + iterative DFS) ──
+from .graph_detect_cycles import handle_graph_detectCycles
+
+# ── Step 7: Edge contract DDL generator (no DB connection needed) ─
+from .graph_edge_contract import handle_graph_edgeContractDDL
+from .graph_find_root_objects import handle_graph_findRootObjects
+
+# ── Step 3: Full lineage / impact analysis (hybrid CTE) ──────────
+from .graph_trace_lineage import handle_graph_traceLineage
diff --git a/src/teradata_mcp_server/tools/graph/_graph_utils.py b/src/teradata_mcp_server/tools/graph/_graph_utils.py
new file mode 100644
index 0000000..179f1e7
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/_graph_utils.py
@@ -0,0 +1,196 @@
+"""
+_graph_utils.py — Shared utility functions for graph analysis tools.
+
+This module is INTERNAL to the graph tool package — it is not registered
+as an MCP tool and is not imported by the server directly. It exists to
+avoid duplicating the BFS helper logic across individual tool files.
+
+Naming convention: the leading underscore signals internal use only.
+
+Contents:
+ bfs_safe_int — Safe int conversion for nullable level columns
+ create_bfs_summary — Summary statistics from a BFS node result list
+ extract_cycle_candidates — Extract direction='BOTH' nodes as cycle candidates
+
+These helpers were originally private functions (_bfs_safe_int,
+_create_bfs_summary, _extract_cycle_candidates) embedded in the monolithic
+graph_tools.py. They are lifted here unchanged so each tool file can import
+them rather than carrying local copies.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+
+def parse_csv_patterns(csv_str: str) -> list[str]:
+ """
+ Split a CSV pattern string into a list of trimmed, non-empty tokens.
+
+ Used by all graph tools to normalise container_pattern, exclude_objects,
+ include_containers, root_node_list, and similar CSV inputs before use.
+
+ Arguments:
+ csv_str - Comma-separated string (may contain whitespace around commas,
+ or be empty / None)
+
+ Returns:
+ List of trimmed non-empty strings; empty list if csv_str is blank or None
+ """
+ return [p.strip() for p in (csv_str or "").split(",") if p.strip()]
+
+
+def build_like_or(patterns: list[str], column: str) -> str:
+ """
+ Build a parenthesised OR-joined LIKE clause for a SQL WHERE predicate.
+
+ Used by graph tools to construct container-scoping predicates against a
+ single SQL column (typically Src_Container_Name or Tgt_Container_Name).
+
+ Arguments:
+ patterns - List of SQL LIKE pattern strings (e.g. ['%SALES%', '%FIN%'])
+ column - SQL column reference (e.g. 'Src_Container_Name')
+
+ Returns:
+ SQL fragment of the form "(col LIKE 'A%' OR col LIKE 'B%')".
+ Callers must ensure patterns is non-empty before calling — an empty
+ list produces the degenerate string "()" which is invalid SQL.
+ """
+ clauses = [f"{column} LIKE '{p}'" for p in patterns]
+ return "(" + " OR ".join(clauses) + ")"
+
+
+def bfs_safe_int(value) -> int | None:
+ """
+ Safely convert a value to int, returning None if conversion fails.
+
+ Used for upstream_level and downstream_level columns which may be None
+ (NULL from Teradata) when a node is unreachable in one direction.
+
+ Arguments:
+ value - Any value from a node dict or Teradata result row
+
+ Returns:
+ int or None
+ """
+ if value is None:
+ return None
+ try:
+ return int(value)
+ except (ValueError, TypeError):
+ return None
+
+
+def create_bfs_summary(nodes: list, cycle_candidates: list) -> dict:
+ """
+ Create summary statistics from a BFS node result list.
+
+ cycle_candidates is passed in from the caller rather than being
+ computed internally — extract_cycle_candidates is called once in
+ the handler and the result is shared here and in response_data,
+ avoiding a redundant second pass over the node list.
+
+ Arguments:
+ nodes - List of node dicts (one per reachable node)
+ cycle_candidates - Pre-computed list from extract_cycle_candidates
+
+ Returns:
+ Dictionary with counts by direction and depth extremes:
+ total_nodes, root_nodes, upstream_only, downstream_only,
+ both_directions, cycle_candidates, max_upstream_depth,
+ max_downstream_depth, nodes_per_nearest_root, object_kind_counts
+ """
+ root_nodes = [n for n in nodes if n.get("is_root") == "Y"]
+ upstream_nodes = [n for n in nodes if n.get("direction") == "U"]
+ downstream_nodes = [n for n in nodes if n.get("direction") == "D"]
+ both_nodes = [n for n in nodes if n.get("direction") == "BOTH"]
+ cycle_cands = cycle_candidates
+
+ # Deepest upstream level (most negative → largest absolute value)
+ up_levels = [
+ abs(bfs_safe_int(n.get("upstream_level")) or 0)
+ for n in nodes
+ if bfs_safe_int(n.get("upstream_level")) is not None
+ ]
+
+ # Deepest downstream level (most positive)
+ down_levels = [
+ bfs_safe_int(n.get("downstream_level")) or 0
+ for n in nodes
+ if bfs_safe_int(n.get("downstream_level")) is not None
+ ]
+
+ # Nearest root grouping — how many nodes per root
+ root_groups: dict[str, int] = {}
+ for n in nodes:
+ nearest = n.get("nearest_root")
+ if nearest:
+ root_groups[nearest] = root_groups.get(nearest, 0) + 1
+
+ # Object kind breakdown
+ kind_counts: dict[str, int] = {}
+ for n in nodes:
+ kind = n.get("object_kind") or "Unknown"
+ kind_counts[kind] = kind_counts.get(kind, 0) + 1
+
+ return {
+ "total_nodes": len(nodes),
+ "root_nodes": len(root_nodes),
+ "upstream_only": len(upstream_nodes),
+ "downstream_only": len(downstream_nodes),
+ "both_directions": len(both_nodes),
+ "cycle_candidates": len(cycle_cands),
+ "max_upstream_depth": max(up_levels, default=0),
+ "max_downstream_depth": max(down_levels, default=0),
+ "nodes_per_nearest_root": root_groups,
+ "object_kind_counts": kind_counts,
+ }
+
+
+def extract_cycle_candidates(nodes: list) -> list:
+ """
+ Extract nodes that are reachable in both directions with unequal
+ absolute upstream and downstream levels.
+
+ A node with direction='BOTH' and abs(upstream_level) != downstream_level
+ is a cycle candidate — the asymmetry indicates a back-edge in the graph,
+ which is the hallmark of a circular reference when traversing the
+ object dependency graph.
+
+ Nodes with direction='BOTH' and equal absolute levels are shared
+ dependencies (reachable in both directions at the same hop count)
+ and are included with cycle_likely=False for completeness.
+
+ Arguments:
+ nodes - List of node dicts
+
+ Returns:
+ List of cycle candidate dicts enriched with:
+ cycle_likely - True if abs(upstream_level) != downstream_level
+ upstream_abs - Absolute value of upstream_level for easy comparison
+ """
+ candidates = []
+
+ for n in nodes:
+ if n.get("direction") != "BOTH":
+ continue
+
+ up_level = bfs_safe_int(n.get("upstream_level"))
+ down_level = bfs_safe_int(n.get("downstream_level"))
+
+ if up_level is None or down_level is None:
+ continue
+
+ up_abs = abs(up_level)
+ cycle_likely = up_abs != down_level
+
+ candidates.append(
+ {
+ **n,
+ "upstream_abs": up_abs,
+ "cycle_likely": cycle_likely,
+ }
+ )
+
+ # Sort: most likely cycles first (asymmetric), then by node name
+ candidates.sort(key=lambda x: (not x["cycle_likely"], x.get("node", "")))
+
+ return candidates
diff --git a/src/teradata_mcp_server/tools/graph/graph_analyse_database.py b/src/teradata_mcp_server/tools/graph/graph_analyse_database.py
new file mode 100644
index 0000000..da6a5b3
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_analyse_database.py
@@ -0,0 +1,880 @@
+"""
+graph_analyseDatabase.py — Composite graph analysis tool.
+
+Provides handle_graph_analyseDatabase and GRAPH_ANALYSE_DATABASE_TOOL.
+
+Runs all four core graph analyses in a single MCP tool call:
+ 1. Root object discovery (findRootObjects logic)
+ 2. Connected component analysis (connectedComponents logic)
+ 3. Cycle detection (detectCycles logic)
+ 4. BFS deployment waves (bfsLevels logic)
+
+CRITICAL SCALABILITY ADVANTAGE:
+ The four individual tools each independently fetch the edge set from
+ Teradata via SQL — that is 4 round-trips fetching the same rows.
+ This composite tool fetches the edge set ONCE and shares it across
+ all four analyses in memory. On a graph with 100 000 edges, this
+ eliminates ~3 redundant network transfers and ~3 redundant SQL scans.
+
+ Additionally, the composite tool returns ONE MCP response instead of
+ four, eliminating 3 stdio/JSON serialisation round-trips through the
+ Claude Desktop MCP transport layer — the primary source of latency
+ observed at small scale.
+
+SP-free: all computation runs in the MCP server process. The only
+Teradata privilege required is SELECT on the edge repository view.
+
+If you don't have an edge repository yet, call graph_edgeContractDDL first to generate the CREATE TABLE or CREATE VIEW DDL for one.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import logging
+import time
+from collections import defaultdict, deque
+from collections.abc import Iterator
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import (
+ bfs_safe_int,
+ build_like_or,
+ create_bfs_summary,
+ extract_cycle_candidates,
+ parse_csv_patterns,
+)
+from teradata_mcp_server.tools.utils import create_response
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Shared helpers
+# ═══════════════════════════════════════════════════════════════════
+# parse_csv_patterns and build_like_or are imported from _graph_utils.
+
+
+def _build_excl_where(excl_patterns: list[str]) -> str:
+ """
+ Build exclusion predicates for SQL WHERE clause.
+
+ Supports both database-only patterns ('SANDBOX%') and fully-qualified
+ patterns ('DB.Object%') containing a dot separator.
+
+ Arguments:
+ excl_patterns - List of exclusion LIKE patterns
+
+ Returns:
+ SQL fragment starting with ' AND NOT (...)', or '' if no patterns
+ """
+ if not excl_patterns:
+ return ""
+ clauses = []
+ for p in excl_patterns:
+ if "." in p:
+ db_part, obj_part = p.split(".", 1)
+ clauses.append(f"(Src_Container_Name LIKE '{db_part}' AND Src_Object_Name LIKE '{obj_part}')")
+ else:
+ clauses.append(f"Src_Container_Name LIKE '{p}'")
+ return " AND NOT (" + " OR ".join(clauses) + ")"
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Union-Find (path-compressed, union-by-rank)
+# ═══════════════════════════════════════════════════════════════════
+
+
+class _UnionFind:
+ """
+ Path-compressed Union-Find for connected component detection.
+
+ Provides near-constant-time union and find operations (O(α·N)
+ amortised via path compression and union-by-rank).
+ """
+
+ def __init__(self):
+ """Initialise empty Union-Find structure."""
+ self._parent: dict[str, str] = {}
+ self._rank: dict[str, int] = {}
+
+ def find(self, x: str) -> str:
+ """
+ Find the root representative of x with path compression.
+
+ Arguments:
+ x - Node identifier
+
+ Returns:
+ Root representative of x's component
+ """
+ if x not in self._parent:
+ self._parent[x] = x
+ self._rank[x] = 0
+ while self._parent[x] != x:
+ self._parent[x] = self._parent[self._parent[x]]
+ x = self._parent[x]
+ return x
+
+ def union(self, a: str, b: str) -> None:
+ """
+ Merge the components containing a and b (union-by-rank).
+
+ Arguments:
+ a - First node identifier
+ b - Second node identifier
+ """
+ ra, rb = self.find(a), self.find(b)
+ if ra == rb:
+ return
+ if self._rank[ra] < self._rank[rb]:
+ ra, rb = rb, ra
+ self._parent[rb] = ra
+ if self._rank[ra] == self._rank[rb]:
+ self._rank[ra] += 1
+
+ def components(self) -> dict[str, list[str]]:
+ """
+ Return all components as {root: [members]} dict.
+
+ Returns:
+ Dictionary mapping component root to sorted member list
+ """
+ comps: dict[str, list[str]] = defaultdict(list)
+ for node in self._parent:
+ comps[self.find(node)].append(node)
+ return {k: sorted(v) for k, v in comps.items()}
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Iterative DFS cycle detection
+# ═══════════════════════════════════════════════════════════════════
+
+
+def _find_cycles_dfs(nodes: set, adj: dict[str, list[str]]) -> list[list[str]]:
+ """
+ Find all simple directed cycles via iterative DFS (grey/black colouring).
+
+ Iterative approach avoids Python's recursion limit on deep graphs.
+
+ Arguments:
+ nodes - Set of node FQ names in this component
+ adj - Adjacency list {src: [tgt, ...]}
+
+ Returns:
+ List of cycles; each cycle is a list of FQ names (start == end)
+ """
+ white, grey, black = 0, 1, 2
+ colour: dict[str, int] = {}
+ cycles: list[list[str]] = []
+
+ for start in nodes:
+ if colour.get(start) == black:
+ continue
+ stack: list[tuple[str, Iterator[str], list[str]]] = [(start, iter(adj.get(start, [])), [start])]
+ colour[start] = grey
+
+ while stack:
+ node, neighbours, path = stack[-1]
+ try:
+ nxt = next(neighbours)
+ if colour.get(nxt) == grey:
+ idx = path.index(nxt)
+ cycles.append(path[idx:] + [nxt])
+ elif colour.get(nxt) != black:
+ colour[nxt] = grey
+ stack.append((nxt, iter(adj.get(nxt, [])), path + [nxt]))
+ except StopIteration:
+ colour[node] = black
+ stack.pop()
+
+ return cycles
+
+
+# ═══════════════════════════════════════════════════════════════════
+# BFS engine
+# ═══════════════════════════════════════════════════════════════════
+
+
+def _run_bfs(
+ root_fqs: list[str],
+ fwd_adj: dict[str, list[str]],
+ rev_adj: dict[str, list[str]],
+ node_meta: dict[str, dict],
+ max_depth_down: int,
+ max_depth_up: int,
+) -> dict:
+ """
+ Run multi-source BFS from the given roots on the in-memory edge set.
+
+ Arguments:
+ root_fqs - List of root node fully-qualified names
+ fwd_adj - Forward adjacency {src: [tgt, ...]} (for downstream)
+ rev_adj - Reverse adjacency {tgt: [src, ...]} (for upstream)
+ node_meta - {fq: {container, object, kind}} metadata lookup
+ max_depth_down - Maximum downstream hops
+ max_depth_up - Maximum upstream hops
+
+ Returns:
+ Dict with 'nodes', 'cycle_candidates', 'summary' keys
+ """
+ down_level: dict[str, int] = {}
+ up_level: dict[str, int] = {}
+ nearest_root: dict[str, str] = {}
+
+ # ── Seed roots at level 0 ──
+ for r in root_fqs:
+ down_level[r] = 0
+ up_level[r] = 0
+ nearest_root[r] = r
+
+ # ── Downstream BFS (forward: src → tgt) ──
+ if max_depth_down > 0:
+ queue: deque[tuple[str, int, str]] = deque()
+ for r in root_fqs:
+ queue.append((r, 0, r))
+ while queue:
+ node, depth, root = queue.popleft()
+ for tgt in fwd_adj.get(node, []):
+ if tgt not in down_level:
+ new_depth = depth + 1
+ if new_depth <= max_depth_down:
+ down_level[tgt] = new_depth
+ nearest_root[tgt] = root
+ queue.append((tgt, new_depth, root))
+
+ # ── Upstream BFS (reverse: tgt → src) ──
+ if max_depth_up > 0:
+ queue = deque()
+ for r in root_fqs:
+ queue.append((r, 0, r))
+ while queue:
+ node, depth, root = queue.popleft()
+ for src in rev_adj.get(node, []):
+ if src not in up_level:
+ new_depth = depth + 1
+ if new_depth <= max_depth_up:
+ up_level[src] = -(new_depth)
+ if src not in nearest_root:
+ nearest_root[src] = root
+ queue.append((src, new_depth, root))
+
+ # ── Assemble node list ──
+ root_set = set(root_fqs)
+ all_reached = set(down_level.keys()) | set(up_level.keys())
+ nodes = []
+ for fq in sorted(all_reached):
+ is_root = fq in root_set
+ d_val = down_level.get(fq)
+ u_val = up_level.get(fq)
+
+ if is_root:
+ direction = "ROOT"
+ elif d_val is not None and u_val is not None:
+ direction = "BOTH"
+ elif u_val is not None:
+ direction = "U"
+ else:
+ direction = "D"
+
+ meta = node_meta.get(fq, {})
+ nodes.append(
+ {
+ "node": fq,
+ "container_name": meta.get("container", fq.split(".")[0] if "." in fq else ""),
+ "object_name": meta.get("object", fq.split(".")[1] if "." in fq else fq),
+ "object_kind": meta.get("kind", "Unknown"),
+ "upstream_level": u_val if not is_root else 0,
+ "downstream_level": d_val if d_val is not None else (0 if is_root else None),
+ "nearest_root": nearest_root.get(fq, ""),
+ "direction": direction,
+ "is_root": "Y" if is_root else "N",
+ }
+ )
+
+ cycle_cands = extract_cycle_candidates(nodes)
+ summary = create_bfs_summary(nodes, cycle_cands)
+
+ return {
+ "nodes": nodes,
+ "cycle_candidates": cycle_cands,
+ "summary": summary,
+ }
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Public handler
+# ═══════════════════════════════════════════════════════════════════
+
+
+def handle_graph_analyseDatabase(
+ conn: TeradataConnection,
+ container_pattern: str,
+ exclude_objects: str = "",
+ top_n_roots: int = 4,
+ max_depth_down: int = 10,
+ max_depth_up: int = 0,
+ edge_repository: str = "",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Composite graph analysis — runs findRootObjects, connectedComponents,
+ detectCycles, and bfsLevels in a single MCP call with ONE shared
+ edge fetch.
+
+ This tool eliminates the scalability bottleneck of serial MCP round-
+ trips by combining four graph analyses that would otherwise require
+ four separate tool calls, each independently fetching the same edge
+ set from Teradata.
+
+ Performance vs individual tools:
+ - 1 SQL round-trip instead of 4 (shared edge fetch)
+ - 1 MCP response instead of 4 (eliminates stdio serialisation overhead)
+ - Same algorithmic complexity (O(V+E) BFS, O(α·N) Union-Find, O(V+E) DFS)
+ - In-memory edge sharing: all analyses operate on the same Python list
+
+ Use this for:
+ - Full database migration readiness assessment
+ - Pre-migration cycle + root + wave analysis in one call
+ - Dashboard data population (all four analyses needed simultaneously)
+ - Any workflow that would otherwise call 3+ individual graph tools
+
+ Arguments:
+ container_pattern - str: CSV LIKE patterns for container scope.
+ Supports wildcards (%) and CSV format.
+ Examples: '%SALES%', '%SALES%,%FINANCE%', 'PROD_%'
+
+ CRITICAL: STRING type, not array.
+ CORRECT: container_pattern="%SALES%,%FINANCE%"
+ WRONG: container_pattern=["%SALES%", "%FINANCE%"]
+
+ exclude_objects - str: CSV LIKE patterns to exclude.
+ Default: '' (no exclusions)
+
+ top_n_roots - int: Number of top root objects (by downstream
+ dependent count) to include in BFS wave analysis.
+ Default: 4
+
+ max_depth_down - int: Maximum downstream BFS hops from roots.
+ Default: 10
+
+ max_depth_up - int: Maximum upstream BFS hops from roots.
+ 0 = skip upstream analysis.
+ Default: 0
+
+ edge_repository - str: Edge repository view/table conforming to the
+ Graph Edge Contract (Src_Container_Name,
+ Src_Object_Name, Src_Kind, Tgt_Container_Name,
+ Tgt_Object_Name, Tgt_Kind columns).
+ Call graph_edgeContractDDL to generate one.
+ Required parameter — no default.
+
+ Returns:
+ ResponseType: single response containing all four analyses:
+
+ {
+ "root_objects": { "objects": [...], "summary": {...} },
+ "components": { "node_details": [...], "summaries": [...], "stats": [...] },
+ "cycles": { "details": [...], "summaries": [...], "stats": [...] },
+ "bfs_waves": { "nodes": [...], "cycle_candidates": [...], "summary": {...} },
+ "edge_stats": { "total_edges": N, "fetch_time_ms": N }
+ }
+
+ Example calls:
+ # Full analysis of Sales and Finance databases
+ handle_graph_analyseDatabase(
+ conn=connection,
+ container_pattern="%SALES%,%FINANCE%",
+ edge_repository="MY_LINEAGE_DB.EdgeRepository"
+ )
+
+ # Single database family with top 8 roots
+ handle_graph_analyseDatabase(
+ conn=connection,
+ container_pattern="%FINANCE%",
+ top_n_roots=8,
+ edge_repository="MY_LINEAGE_DB.EdgeRepository"
+ )
+
+ # Exclude sandbox schemas
+ handle_graph_analyseDatabase(
+ conn=connection,
+ container_pattern="PROD_%,STAGE_%",
+ exclude_objects="SANDBOX%,%.temp_%",
+ edge_repository="MY_LINEAGE_DB.EdgeRepository"
+ )
+ """
+ logger.debug(
+ "Tool: handle_graph_analyseDatabase: Args: "
+ "container_pattern=%s, exclude_objects=%s, top_n_roots=%d, "
+ "max_depth_down=%d, max_depth_up=%d, edge_repository=%s",
+ container_pattern,
+ exclude_objects,
+ top_n_roots,
+ max_depth_down,
+ max_depth_up,
+ edge_repository,
+ )
+
+ t_start = time.time()
+ container_patterns = parse_csv_patterns(container_pattern)
+ excl_patterns = parse_csv_patterns(exclude_objects)
+
+ if not container_patterns:
+ return create_response(
+ {"error": "container_pattern must not be empty"},
+ {"tool_name": tool_name or "graph_analyseDatabase", "status": "error"},
+ )
+
+ if not edge_repository:
+ return create_response(
+ {"error": "edge_repository is required. Call graph_edgeContractDDL to generate one."},
+ {"tool_name": tool_name or "graph_analyseDatabase", "status": "error"},
+ )
+
+ try:
+ # ═══════════════════════════════════════════════════════════
+ # STEP 0 — Single shared edge fetch (ONE SQL round-trip)
+ # ═══════════════════════════════════════════════════════════
+ container_where = build_like_or(container_patterns, "Src_Container_Name")
+ excl_where = _build_excl_where(excl_patterns)
+
+ edge_sql = f"""
+LOCKING ROW FOR ACCESS
+SELECT
+ TRIM(Src_Container_Name) AS SrcDB
+ ,TRIM(Src_Object_Name) AS SrcObj
+ ,Src_Kind AS SrcKind
+ ,TRIM(Tgt_Container_Name) AS TgtDB
+ ,TRIM(Tgt_Object_Name) AS TgtObj
+ ,Tgt_Kind AS TgtKind
+FROM {edge_repository}
+WHERE {container_where}
+ {excl_where}
+"""
+ logger.debug("Tool: handle_graph_analyseDatabase: Edge SQL:\n%s", edge_sql)
+
+ with conn.cursor() as cur:
+ cur.execute(edge_sql)
+ raw_edges = cur.fetchall()
+
+ t_fetch = time.time()
+ fetch_ms = round((t_fetch - t_start) * 1000)
+ edge_count = len(raw_edges)
+
+ logger.info("Tool: handle_graph_analyseDatabase: Fetched %d edges in %dms", edge_count, fetch_ms)
+
+ # ── Build in-memory structures shared by all analyses ──
+ # Forward adjacency: src → [tgt, ...] (directed: dependency → dependent)
+ fwd_adj: dict[str, list[str]] = defaultdict(list)
+ # Reverse adjacency: tgt → [src, ...] (for upstream BFS)
+ rev_adj: dict[str, list[str]] = defaultdict(list)
+ # Node metadata registry
+ node_meta: dict[str, dict] = {}
+ # Union-Find for connected components
+ uf = _UnionFind()
+ # Track downstream dependent counts for root discovery
+ src_nodes: dict[str, int] = defaultdict(int)
+ tgt_nodes: set[str] = set()
+
+ for src_db, src_obj, src_kind, tgt_db, tgt_obj, tgt_kind in raw_edges:
+ if not src_obj or not tgt_obj:
+ continue # Skip null edges
+
+ src_fq = f"{src_db}.{src_obj}"
+ tgt_fq = f"{tgt_db}.{tgt_obj}"
+
+ fwd_adj[src_fq].append(tgt_fq)
+ rev_adj[tgt_fq].append(src_fq)
+ uf.union(src_fq, tgt_fq)
+
+ # Count downstream dependents per source
+ src_nodes[src_fq] += 1
+ tgt_nodes.add(tgt_fq)
+
+ # Store node metadata
+ if src_fq not in node_meta:
+ node_meta[src_fq] = {
+ "container": src_db,
+ "object": src_obj,
+ "kind": src_kind or "Unknown",
+ }
+ if tgt_fq not in node_meta:
+ node_meta[tgt_fq] = {
+ "container": tgt_db,
+ "object": tgt_obj,
+ "kind": tgt_kind or "Unknown",
+ }
+
+ # ═══════════════════════════════════════════════════════════
+ # STEP 1 — Root objects (objects never appearing as targets)
+ # ═══════════════════════════════════════════════════════════
+ root_objects = []
+ for fq, downstream_count in src_nodes.items():
+ if fq not in tgt_nodes:
+ meta = node_meta.get(fq, {})
+ root_objects.append(
+ {
+ "DatabaseName": meta.get("container", ""),
+ "ObjectName": meta.get("object", ""),
+ "FullyQualifiedName": fq,
+ "ObjectType": meta.get("kind", "Unknown"),
+ "DownstreamDependentCount": downstream_count,
+ }
+ )
+
+ # Sort by downstream impact descending
+ root_objects.sort(key=lambda x: (-x["DownstreamDependentCount"], x["FullyQualifiedName"]))
+
+ # Summary statistics
+ type_counts: dict[str, int] = {}
+ db_counts: dict[str, int] = {}
+ for obj in root_objects:
+ t = obj["ObjectType"]
+ type_counts[t] = type_counts.get(t, 0) + 1
+ d = obj["DatabaseName"]
+ db_counts[d] = db_counts.get(d, 0) + 1
+
+ root_summary = {
+ "total_root_objects": len(root_objects),
+ "object_type_counts": type_counts,
+ "database_counts": db_counts,
+ "total_downstream_dependencies": sum(o["DownstreamDependentCount"] for o in root_objects),
+ }
+
+ t_roots = time.time()
+ logger.info(
+ "Tool: handle_graph_analyseDatabase: Found %d root objects in %dms",
+ len(root_objects),
+ round((t_roots - t_fetch) * 1000),
+ )
+
+ # ═══════════════════════════════════════════════════════════
+ # STEP 2 — Connected components (reuse Union-Find from step 0)
+ # ═══════════════════════════════════════════════════════════
+ raw_comps = uf.components()
+
+ # Assign sequential integer IDs sorted by descending size
+ sorted_roots = sorted(raw_comps.keys(), key=lambda r: -len(raw_comps[r]))
+ root_to_id = {r: i + 1 for i, r in enumerate(sorted_roots)}
+
+ comp_node_details = []
+ comp_id_map: dict[str, int] = {}
+ for root, members in raw_comps.items():
+ cid = root_to_id[root]
+ for fq in members:
+ comp_id_map[fq] = cid
+ meta = node_meta.get(fq, {})
+ comp_node_details.append(
+ {
+ "Node_FQ": fq,
+ "DatabaseName": meta.get("container", ""),
+ "ObjectName": meta.get("object", ""),
+ "Component_Id": cid,
+ "Object_Kind": meta.get("kind", "Unknown"),
+ }
+ )
+
+ comp_summaries = []
+ for root in sorted_roots:
+ cid = root_to_id[root]
+ members = raw_comps[root]
+ comp_summaries.append(
+ {
+ "Component_Id": cid,
+ "Node_Count": len(members),
+ "Node_List": ", ".join(members),
+ }
+ )
+
+ comp_stats = [
+ {
+ "Component_Count": len(raw_comps),
+ "Node_Count": len(comp_id_map),
+ "Edge_Count": edge_count,
+ "Largest_Component": max(len(m) for m in raw_comps.values()) if raw_comps else 0,
+ "Smallest_Component": min(len(m) for m in raw_comps.values()) if raw_comps else 0,
+ "Singleton_Count": sum(1 for m in raw_comps.values() if len(m) == 1),
+ "Summary_Message": (
+ f"{len(raw_comps)} connected component(s) identified "
+ f"across {len(comp_id_map)} node(s) and {edge_count} edge(s)."
+ ),
+ }
+ ]
+
+ t_comps = time.time()
+ logger.info(
+ "Tool: handle_graph_analyseDatabase: %d components in %dms",
+ len(raw_comps),
+ round((t_comps - t_roots) * 1000),
+ )
+
+ # ═══════════════════════════════════════════════════════════
+ # STEP 3 — Cycle detection (reuse adj + UF from step 0)
+ # ═══════════════════════════════════════════════════════════
+ all_cycles: list[list[str]] = []
+ components_scanned = 0
+
+ for root in sorted_roots:
+ cycle_members = set(raw_comps[root])
+ if len(cycle_members) < 2:
+ continue
+ components_scanned += 1
+ cycles = _find_cycles_dfs(cycle_members, fwd_adj)
+ all_cycles.extend(cycles)
+
+ # Deduplicate by canonical form (min rotation)
+ seen_canonical: set[tuple[str, ...]] = set()
+ unique_cycles: list[list[str]] = []
+ for cycle in all_cycles:
+ inner = cycle[:-1]
+ if not inner:
+ continue
+ min_idx = inner.index(min(inner))
+ canonical = tuple(inner[min_idx:] + inner[:min_idx])
+ if canonical not in seen_canonical:
+ seen_canonical.add(canonical)
+ unique_cycles.append(cycle)
+
+ # Build cycle details and summaries
+ cycle_details = []
+ cycle_summaries = []
+ cycle_node_set: set[str] = set()
+
+ for cycle_id, cycle in enumerate(unique_cycles, 1):
+ cycle_len = len(cycle) - 1
+ for pos, fq in enumerate(cycle[:-1], 1):
+ cycle_node_set.add(fq)
+ cycle_details.append(
+ {
+ "Cycle_Id": cycle_id,
+ "Cycle_Pos": pos,
+ "Node_FQ": fq,
+ "Cycle_Length": cycle_len,
+ "Component_Id": comp_id_map.get(fq, 0),
+ "Strategy": "DFS",
+ }
+ )
+ cycle_summaries.append(
+ {
+ "Cycle_Id": cycle_id,
+ "Cycle_Length": cycle_len,
+ "Component_Id": comp_id_map.get(cycle[0], 0),
+ "Strategy": "DFS",
+ "Cycle_Path": " -> ".join(cycle),
+ }
+ )
+
+ comps_with_cycles = len({cd["Component_Id"] for cd in cycle_details})
+
+ cycle_stats = [
+ {
+ "Cycle_Count": len(unique_cycles),
+ "Total_Nodes_In_Cycles": len(cycle_details),
+ "Unique_Nodes_In_Cycles": len(cycle_node_set),
+ "Components_With_Cycles": comps_with_cycles,
+ "Edge_Count": edge_count,
+ "Components_Scanned": components_scanned,
+ "Strategy_Used": "DFS",
+ "Summary_Message": (
+ f"{len(unique_cycles)} cycle(s) detected."
+ if unique_cycles
+ else "No cycles detected — graph is a DAG."
+ ),
+ }
+ ]
+
+ t_cycles = time.time()
+ logger.info(
+ "Tool: handle_graph_analyseDatabase: %d cycles in %dms",
+ len(unique_cycles),
+ round((t_cycles - t_comps) * 1000),
+ )
+
+ # ═══════════════════════════════════════════════════════════
+ # STEP 4 — BFS waves from top N root objects
+ # ═══════════════════════════════════════════════════════════
+ top_roots = root_objects[:top_n_roots]
+ top_root_fqs = [r["FullyQualifiedName"] for r in top_roots]
+
+ if top_root_fqs:
+ bfs_result = _run_bfs(
+ root_fqs=top_root_fqs,
+ fwd_adj=fwd_adj,
+ rev_adj=rev_adj,
+ node_meta=node_meta,
+ max_depth_down=max_depth_down,
+ max_depth_up=max_depth_up,
+ )
+ else:
+ bfs_result = {
+ "nodes": [],
+ "cycle_candidates": [],
+ "summary": {
+ "total_nodes": 0,
+ "root_nodes": 0,
+ "upstream_only": 0,
+ "downstream_only": 0,
+ "both_directions": 0,
+ "cycle_candidates": 0,
+ "max_upstream_depth": 0,
+ "max_downstream_depth": 0,
+ "nodes_per_nearest_root": {},
+ "object_kind_counts": {},
+ },
+ }
+
+ t_bfs = time.time()
+ logger.info(
+ "Tool: handle_graph_analyseDatabase: BFS %d nodes in %dms",
+ len(bfs_result["nodes"]),
+ round((t_bfs - t_cycles) * 1000),
+ )
+
+ # ═══════════════════════════════════════════════════════════
+ # Assemble composite response
+ # ═══════════════════════════════════════════════════════════
+ t_total = round((time.time() - t_start) * 1000)
+
+ response_data = {
+ "root_objects": {
+ "objects": root_objects,
+ "summary": root_summary,
+ },
+ "components": {
+ "node_details": comp_node_details,
+ "summaries": comp_summaries,
+ "stats": comp_stats,
+ },
+ "cycles": {
+ "details": cycle_details,
+ "summaries": cycle_summaries,
+ "stats": cycle_stats,
+ },
+ "bfs_waves": bfs_result,
+ "edge_stats": {
+ "total_edges": edge_count,
+ "fetch_time_ms": fetch_ms,
+ "total_time_ms": t_total,
+ },
+ }
+
+ metadata = {
+ "tool_name": tool_name or "graph_analyseDatabase",
+ "container_pattern": container_pattern,
+ "exclude_objects": exclude_objects,
+ "top_n_roots": top_n_roots,
+ "max_depth_down": max_depth_down,
+ "max_depth_up": max_depth_up,
+ "edge_repository": edge_repository,
+ "timing": {
+ "edge_fetch_ms": fetch_ms,
+ "root_objects_ms": round((t_roots - t_fetch) * 1000),
+ "components_ms": round((t_comps - t_roots) * 1000),
+ "cycles_ms": round((t_cycles - t_comps) * 1000),
+ "bfs_ms": round((t_bfs - t_cycles) * 1000),
+ "total_ms": t_total,
+ },
+ "counts": {
+ "edges": edge_count,
+ "root_objects": len(root_objects),
+ "components": len(raw_comps),
+ "cycles": len(unique_cycles),
+ "bfs_nodes": len(bfs_result["nodes"]),
+ },
+ "status": "success",
+ "message": (
+ f"Composite analysis complete: {len(root_objects)} roots, "
+ f"{len(raw_comps)} components, {len(unique_cycles)} cycles, "
+ f"{len(bfs_result['nodes'])} BFS nodes. "
+ f"Total: {t_total}ms (1 SQL fetch: {fetch_ms}ms)."
+ ),
+ }
+
+ logger.info(
+ "Tool: handle_graph_analyseDatabase: Complete in %dms — %d roots, %d components, %d cycles, %d BFS nodes",
+ t_total,
+ len(root_objects),
+ len(raw_comps),
+ len(unique_cycles),
+ len(bfs_result["nodes"]),
+ )
+
+ return create_response(response_data, metadata)
+
+ except Exception as e:
+ logger.error("Tool: handle_graph_analyseDatabase: Error: %s", e, exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": tool_name or "graph_analyseDatabase",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Tool registration descriptor
+# ═══════════════════════════════════════════════════════════════════
+
+GRAPH_ANALYSE_DATABASE_TOOL = {
+ "name": "graph_analyseDatabase",
+ "handler": handle_graph_analyseDatabase,
+ "description": (
+ "Composite graph analysis — runs root object discovery, connected "
+ "component analysis, cycle detection, and BFS deployment wave "
+ "planning in a SINGLE MCP call with one shared edge fetch. "
+ "Use this instead of calling graph_findRootObjects, "
+ "graph_connectedComponents, graph_detectCycles, and "
+ "graph_bfsLevels individually when you need two or more of "
+ "these analyses. Returns all four result sets in one response. "
+ "Dramatically faster than sequential calls due to shared edge "
+ "fetch (1 SQL round-trip instead of 4) and single MCP response. "
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "If you don't have one yet, call graph_edgeContractDDL first to "
+ "generate the CREATE TABLE or CREATE VIEW DDL."
+ ),
+ "parameters": {
+ "container_pattern": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns for databases/schemas to analyse. "
+ "Supports wildcards: '%SALES%' or '%SALES%,%FINANCE%'."
+ ),
+ "required": True,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": ("CSV LIKE patterns to exclude. Example: 'SANDBOX%,%.temp_%'. Default: ''."),
+ "default": "",
+ },
+ "top_n_roots": {
+ "type": "integer",
+ "description": (
+ "Number of top root objects (by downstream impact) to include in BFS wave analysis. Default: 4."
+ ),
+ "default": 4,
+ },
+ "max_depth_down": {
+ "type": "integer",
+ "description": ("Maximum downstream BFS hops from roots. Default: 10."),
+ "default": 10,
+ },
+ "max_depth_up": {
+ "type": "integer",
+ "description": ("Maximum upstream BFS hops. 0 = skip upstream. Default: 0."),
+ "default": 0,
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required parameter — no default."
+ ),
+ "required": True,
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_bfs_levels.py b/src/teradata_mcp_server/tools/graph/graph_bfs_levels.py
new file mode 100644
index 0000000..61f2b21
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_bfs_levels.py
@@ -0,0 +1,886 @@
+"""
+graph_bfsLevels.py — Pure-Python BFS implementation for graph dependency analysis.
+
+This module provides handle_graph_bfsLevels, a pure-Python BFS
+implementation that executes entirely in the MCP server process.
+
+Key design points:
+ - One SQL round-trip to Teradata (edge fetch), then all BFS runs in Python.
+ - Standard queue-based BFS (O(V+E)) rather than iterative SQL relaxation.
+ - No stored procedure dependency — no volatile tables, no Teradata DDL objects.
+ - All include_containers, exclude_objects, and depth-cap filtering applied
+ in Python before BFS starts.
+ - Output schema: node fields, direction values, nearest_root,
+ cycle_candidates, summary — fully compatible with the MCP tool
+ descriptor, tool registration, and all callers.
+
+Edge direction convention (critical — matches the corrected SP):
+ Edge Repository edge: Src "referenced by" Tgt
+ => Src is the DEPENDENCY (upstream of Tgt)
+ => Tgt is the DEPENDENT (downstream of Src)
+
+ Upstream BFS (finds what a node DEPENDS ON):
+ Traverse edges in the Src→Tgt direction.
+ Starting from settled Tgt-side nodes, discover Src-side ancestors.
+ node_i = Src_Object_Name_FQ (upstream candidate being discovered)
+ node_j = Tgt_Object_Name_FQ (already-settled downstream neighbour)
+
+ Downstream BFS (finds what DEPENDS ON a node):
+ Traverse edges in the Tgt→Src direction.
+ Starting from settled Src-side nodes, discover Tgt-side consumers.
+ node_i = Tgt_Object_Name_FQ (downstream candidate being discovered)
+ node_j = Src_Object_Name_FQ (already-settled upstream neighbour)
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import fnmatch
+import logging
+from collections import defaultdict, deque
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import (
+ bfs_safe_int,
+ create_bfs_summary,
+ extract_cycle_candidates,
+ parse_csv_patterns,
+)
+from teradata_mcp_server.tools.utils import create_response, rows_to_json
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ---------------------------------------------------------------------------
+# Public handler
+# ---------------------------------------------------------------------------
+
+
+def handle_graph_bfsLevels(
+ conn: TeradataConnection,
+ root_node_list: str,
+ max_depth_up: int = 10,
+ max_depth_down: int = 10,
+ exclude_objects: str = "",
+ include_containers: str = "",
+ edge_repository: str = "",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Compute BFS shortest-path hop distances from one or more root nodes.
+
+ Pure-Python implementation — no stored procedure required.
+
+ WHEN TO USE THIS TOOL vs graph_traceLineage:
+ -------------------------------------------------------
+ Use graph_bfsLevels when asked to:
+ - Sequence objects for deployment or migration (ORDER BY downstream_level
+ gives correct topological deployment order for root objects)
+ - Group objects into migration waves (nearest_root identifies which of
+ the input root tables each object belongs to)
+ - Find which migration root table each object is closest to across a
+ multi-root migration scope
+ - Identify cycle members by depth (direction='BOTH' nodes with unequal
+ absolute upstream/downstream levels are cycle candidates)
+ - Count objects within N hops of a change (blast-radius sizing)
+ - Answer "how far is object X from the migration root tables?"
+
+ Do NOT use graph_bfsLevels for general lineage tracing, impact path
+ analysis, or questions about which specific objects depend on which.
+ Use graph_traceLineage for those — it returns the full edge
+ set with relationship detail. graph_bfsLevels returns distances and
+ wave groupings, not dependency paths or edge detail.
+
+ KEY DISTINCTION — root_node_list accepts EXACT FQ names only (no
+ wildcards). Use graph_findRootObjects first to identify the seed
+ objects, then pass their exact FQ names here.
+
+ Arguments:
+ root_node_list - str: CSV of exact fully-qualified root node names.
+ No wildcards — exact names only.
+
+ SINGLE ROOT:
+ 'DEV01_StGeo_STD_T.mortgage_account'
+
+ MULTIPLE ROOTS (CSV):
+ 'DEV01_StGeo_STD_T.mortgage_account,
+ DEV01_StGeo_STD_T.mortgage_borrower,
+ DEV01_StGeo_STD_T.mortgage_property'
+
+ CRITICAL: Exact FQ names, no wildcards.
+ Use graph_findRootObjects or
+ graph_traceLineage first to discover names.
+
+ max_depth_up - int: Maximum upstream hops to traverse.
+ 0 = skip upstream analysis entirely.
+ Default: 10
+
+ Upstream means "what this object DEPENDS ON" —
+ its sources, prerequisites, and ancestors.
+ For root objects with in-degree zero, upstream_level
+ will be NULL for all non-root nodes (correct).
+
+ max_depth_down - int: Maximum downstream hops to traverse.
+ 0 = skip downstream analysis entirely.
+ Default: 10
+
+ Downstream means "what DEPENDS ON this object" —
+ its consumers, dependents, and impact radius.
+ For root objects with in-degree zero, downstream_level
+ will show positive values for all consumers (correct).
+
+ exclude_objects - str: CSV of FQ object name LIKE patterns to exclude.
+ Matched against both Src and Tgt sides of every edge.
+ Python fnmatch is used for pattern matching (% → *).
+ Example: 'DFJ%,C_D02%,%.temp_%'
+ Default: '' (no exclusions)
+
+ include_containers - str: CSV of container name LIKE patterns to include.
+ Only edges where BOTH Src and Tgt containers match
+ at least one pattern are traversed.
+ Python fnmatch used for matching (% → *).
+ Empty = all containers included.
+ Example: 'DEV01_StGeo%,MF_STGEO%,TABLEAU%,POWERBI%'
+ Default: '' (all containers)
+
+ edge_repository - str: Edge repository view/table conforming to the
+ Required parameter — no default.
+
+ Returns:
+ ResponseType: formatted response with BFS node results + metadata.
+ Schema is identical to handle_graph_bfsLevels (SP-based tool).
+
+ Response structure:
+ {
+ "nodes": [
+ {
+ "node": "DEV01_StGeo_STD_T.mortgage_account",
+ "container_name": "DEV01_StGeo_STD_T",
+ "object_name": "mortgage_account",
+ "object_kind": "Table",
+ "upstream_level": None, // None (NULL) if unreachable or skipped
+ "downstream_level": 0, // 0 for root, positive for consumers
+ "nearest_root": "DEV01_StGeo_STD_T.mortgage_account",
+ "direction": "ROOT", // ROOT / U / D / BOTH
+ "is_root": "Y"
+ },
+ ...
+ ],
+ "cycle_candidates": [...], // direction='BOTH' nodes with unequal
+ // absolute upstream/downstream levels
+ "summary": {
+ "total_nodes": 46,
+ "root_nodes": 3,
+ "upstream_only": 12,
+ "downstream_only": 28,
+ "both_directions": 3,
+ "cycle_candidates": 1,
+ "max_upstream_depth": 4,
+ "max_downstream_depth": 5,
+ "nodes_per_nearest_root": {"DB.Root1": 20, "DB.Root2": 26},
+ "object_kind_counts": {"Table": 10, "View": 22, "Macro": 8, ...}
+ }
+ }
+
+ direction values:
+ ROOT - One of the input root nodes
+ U - Reachable upstream only (negative upstream_level)
+ D - Reachable downstream only (positive downstream_level)
+ BOTH - Reachable in both directions — possible cycle member.
+ Unequal absolute levels indicate a back-edge (cycle).
+ Equal absolute levels indicate a shared dependency.
+
+ Technical Implementation Notes:
+ - One SQL round-trip to fetch all edges matching the container/exclusion
+ filters. All BFS computation is then done in Python memory.
+ - Standard queue-based BFS (O(V+E)) — optimal for unweighted graphs.
+ This is more correct than the original Bellman-Ford style SQL
+ relaxation loop that the SP inherited from the notebook.
+ - Multi-source BFS: all root nodes are seeded simultaneously at level 0.
+ Each non-root node settles at the distance to its nearest root, with
+ ties broken deterministically by lexicographic root name order.
+ - Upstream BFS follows Src→Tgt edges to discover Src-side ancestors.
+ - Downstream BFS follows Tgt→Src edges to discover Tgt-side consumers.
+ - This direction convention matches the corrected SP (Option B fix):
+ upstream_level = NULL for root objects with in-degree zero (correct)
+ downstream_level = positive for all consumers (correct)
+ - Filter application order:
+ 1. SQL WHERE clause: fetch only edges matching include_containers
+ (both Src and Tgt containers must match at least one pattern)
+ 2. Python post-filter: exclude edges where either endpoint matches
+ an exclude_objects pattern (applied before building adjacency)
+ 3. BFS depth cap: enforced during queue processing
+ - Node metadata (container_name, object_name, object_kind) is derived
+ from the edge set and stored in a node registry during the fetch phase.
+ """
+ logger.debug(
+ "Tool: handle_graph_bfsLevels: Args: root_node_list=%s, "
+ "max_depth_up=%s, max_depth_down=%s, exclude_objects=%s, "
+ "include_containers=%s, edge_repository=%s",
+ root_node_list,
+ max_depth_up,
+ max_depth_down,
+ exclude_objects,
+ include_containers,
+ edge_repository,
+ )
+
+ if not edge_repository:
+ return create_response(
+ {"error": "edge_repository is required. Call graph_edgeContractDDL to generate one."},
+ {
+ "tool_name": tool_name or "graph_bfsLevels",
+ "status": "error",
+ },
+ )
+
+ # Clamp depth parameters to safe range
+ max_depth_up = max(0, min(10, int(max_depth_up)))
+ max_depth_down = max(0, min(10, int(max_depth_down)))
+
+ _tn = tool_name if tool_name else "graph_bfsLevels"
+
+ try:
+ # ------------------------------------------------------------------
+ # Step 1 — Parse root node list
+ # ------------------------------------------------------------------
+ roots: list[str] = parse_csv_patterns(root_node_list)
+
+ if not roots:
+ raise ValueError(f"root_node_list is empty or could not be parsed: '{root_node_list}'")
+
+ logger.debug(f"Tool: handle_graph_bfsLevels: Parsed {len(roots)} root node(s): {roots}")
+
+ # ------------------------------------------------------------------
+ # Step 2 — Parse filter patterns for Python-side matching
+ # ------------------------------------------------------------------
+ excl_patterns = parse_csv_patterns(exclude_objects) # may be empty
+ incl_patterns = parse_csv_patterns(include_containers) # may be empty
+
+ # ------------------------------------------------------------------
+ # Step 3 — Fetch edge set from Teradata (one round-trip)
+ #
+ # include_containers filter is applied in SQL (WHERE clause) for
+ # efficiency — avoids fetching edges that will be discarded.
+ # exclude_objects filter is applied in Python (more flexible LIKE
+ # patterns that are awkward to push into a single SQL predicate).
+ #
+ # Column selection:
+ # Src_Object_Name_FQ — fully-qualified source (dependency/upstream)
+ # Tgt_Object_Name_FQ — fully-qualified target (dependent/downstream)
+ # Src_Container_Name — database of source (for node registry)
+ # Src_Object_Name — short name of source (for node registry)
+ # Src_Kind — object type of source
+ # Tgt_Container_Name — database of target
+ # Tgt_Object_Name — short name of target
+ # Tgt_Kind — object type of target
+ # ------------------------------------------------------------------
+ fetch_sql = _build_fetch_sql(
+ edge_repository=edge_repository,
+ incl_patterns=incl_patterns,
+ )
+
+ logger.debug(f"Tool: handle_graph_bfsLevels: Fetching edges: {fetch_sql}")
+
+ with conn.cursor() as cur:
+ cur.execute(fetch_sql)
+ raw_rows = cur.fetchall()
+ col_names = [d[0].lower() for d in cur.description]
+
+ logger.debug(f"Tool: handle_graph_bfsLevels: Fetched {len(raw_rows)} raw edge rows")
+
+ # ------------------------------------------------------------------
+ # Step 4 — Build in-memory graph structures
+ #
+ # node_registry: node_fq → {container_name, object_name, object_kind}
+ # fwd_adj: Src → {Tgt} (Src referenced by Tgt; Src is the dependency)
+ # rev_adj: Tgt → {Src} (reverse: Tgt depends on Src)
+ #
+ # fwd_adj is used by the UPSTREAM BFS to discover Src-side ancestors
+ # starting from settled Tgt-side neighbours.
+ #
+ # rev_adj is used by the DOWNSTREAM BFS to discover Tgt-side consumers
+ # starting from settled Src-side neighbours.
+ #
+ # Exclude-objects filtering is applied here: any edge where either
+ # endpoint FQ name matches a pattern in excl_patterns is dropped.
+ # ------------------------------------------------------------------
+ node_registry: dict[str, dict] = {}
+ fwd_adj: dict[str, set[str]] = defaultdict(set) # Src → {Tgt}
+ rev_adj: dict[str, set[str]] = defaultdict(set) # Tgt → {Src}
+
+ col_idx = {name: i for i, name in enumerate(col_names)}
+
+ edges_total = 0
+ edges_excluded = 0
+
+ for row in raw_rows:
+ src_fq = _val(row, col_idx, "src_object_name_fq")
+ tgt_fq = _val(row, col_idx, "tgt_object_name_fq")
+ src_db = _val(row, col_idx, "src_container_name")
+ src_nm = _val(row, col_idx, "src_object_name")
+ src_knd = _val(row, col_idx, "src_kind")
+ tgt_db = _val(row, col_idx, "tgt_container_name")
+ tgt_nm = _val(row, col_idx, "tgt_object_name")
+ tgt_knd = _val(row, col_idx, "tgt_kind")
+
+ if not src_fq or not tgt_fq:
+ continue
+
+ edges_total += 1
+
+ # Apply exclude_objects filter — both endpoints checked
+ if excl_patterns and (_matches_any(src_fq, excl_patterns) or _matches_any(tgt_fq, excl_patterns)):
+ edges_excluded += 1
+ continue
+
+ # Register both nodes in the registry
+ if src_fq not in node_registry:
+ node_registry[src_fq] = {
+ "container_name": src_db or "",
+ "object_name": src_nm or src_fq.split(".")[-1],
+ "object_kind": src_knd or "",
+ }
+ if tgt_fq not in node_registry:
+ node_registry[tgt_fq] = {
+ "container_name": tgt_db or "",
+ "object_name": tgt_nm or tgt_fq.split(".")[-1],
+ "object_kind": tgt_knd or "",
+ }
+
+ # Build forward and reverse adjacency
+ fwd_adj[src_fq].add(tgt_fq) # Src → Tgt
+ rev_adj[tgt_fq].add(src_fq) # Tgt → Src
+
+ logger.debug(
+ f"Tool: handle_graph_bfsLevels: "
+ f"Graph built — {len(node_registry)} unique nodes, "
+ f"{edges_total} raw edges, {edges_excluded} excluded. "
+ f"|fwd_adj|={len(fwd_adj)}, |rev_adj|={len(rev_adj)}"
+ )
+
+ # Ensure root nodes are registered even if they have no edges
+ # (isolated roots are valid — they appear only as ROOT in output)
+ for r in roots:
+ if r not in node_registry:
+ parts = r.split(".", 1)
+ node_registry[r] = {
+ "container_name": parts[0] if len(parts) > 1 else "",
+ "object_name": parts[1] if len(parts) > 1 else r,
+ "object_kind": "",
+ }
+
+ # ------------------------------------------------------------------
+ # Step 5 — Multi-source BFS: UPSTREAM pass
+ #
+ # "Upstream" = what a node DEPENDS ON (its sources, ancestors).
+ #
+ # Edge Repository: Src "referenced by" Tgt ⟹ Src is the dependency.
+ #
+ # Algorithm:
+ # Seed all root nodes at level 0.
+ # For each settled Tgt-side node (neighbour), look up its Src-side
+ # nodes via rev_adj (Tgt → {Src}).
+ # Each reachable Src node is upstream of the root.
+ #
+ # Why rev_adj?
+ # rev_adj[tgt] = {all Src nodes that Tgt depends on}
+ # Walking rev_adj from a settled node discovers its dependencies —
+ # which is exactly "upstream" in data lineage terms.
+ #
+ # For root objects with in-degree zero (no rev_adj entry), no Src
+ # nodes exist, so upstream_level remains None for all non-root nodes.
+ # This is correct behaviour.
+ # ------------------------------------------------------------------
+ up_level: dict[str, int] = {} # node_fq → hop count (0..N)
+ up_root: dict[str, str] = {} # node_fq → nearest root
+
+ if max_depth_up > 0:
+ up_level, up_root = _bfs_multisource(
+ roots=roots,
+ adj=rev_adj, # Tgt → {Src}: walk upstream
+ max_depth=max_depth_up,
+ label="upstream",
+ )
+ logger.debug(
+ f"Tool: handle_graph_bfsLevels: Upstream BFS settled {len(up_level)} nodes (max_depth={max_depth_up})"
+ )
+ else:
+ logger.debug("Tool: handle_graph_bfsLevels: Upstream BFS skipped (max_depth_up=0)")
+
+ # ------------------------------------------------------------------
+ # Step 6 — Multi-source BFS: DOWNSTREAM pass
+ #
+ # "Downstream" = what DEPENDS ON a node (its consumers, dependents).
+ #
+ # Edge Repository: Src "referenced by" Tgt ⟹ Tgt is the dependent.
+ #
+ # Algorithm:
+ # Seed all root nodes at level 0.
+ # For each settled Src-side node (neighbour), look up its Tgt-side
+ # nodes via fwd_adj (Src → {Tgt}).
+ # Each reachable Tgt node is downstream of the root.
+ #
+ # Why fwd_adj?
+ # fwd_adj[src] = {all Tgt nodes that reference Src}
+ # Walking fwd_adj from a settled node discovers its consumers —
+ # which is exactly "downstream" in data lineage terms.
+ #
+ # For root objects with in-degree zero, all their Tgt-side consumers
+ # are reachable via fwd_adj, so downstream_level correctly shows
+ # positive values for views, macros, reports, etc.
+ # ------------------------------------------------------------------
+ dn_level: dict[str, int] = {}
+ dn_root: dict[str, str] = {}
+
+ if max_depth_down > 0:
+ dn_level, dn_root = _bfs_multisource(
+ roots=roots,
+ adj=fwd_adj, # Src → {Tgt}: walk downstream
+ max_depth=max_depth_down,
+ label="downstream",
+ )
+ logger.debug(
+ f"Tool: handle_graph_bfsLevels: "
+ f"Downstream BFS settled {len(dn_level)} nodes "
+ f"(max_depth={max_depth_down})"
+ )
+ else:
+ logger.debug("Tool: handle_graph_bfsLevels: Downstream BFS skipped (max_depth_down=0)")
+
+ # ------------------------------------------------------------------
+ # Step 7 — Assemble result rows
+ #
+ # One row per reachable node (including roots themselves).
+ # Schema matches SP output exactly so callers need no changes.
+ #
+ # Rules:
+ # upstream_level : negative integer (-(hop_count)), None if unreachable
+ # downstream_level : positive integer (+hop_count), None if unreachable
+ # Root node : upstream_level=0, downstream_level=0 always
+ # direction : ROOT / U / D / BOTH
+ # nearest_root : upstream root takes precedence over downstream root
+ # is_root : 'Y' if node is in the root set, 'N' otherwise
+ # ------------------------------------------------------------------
+ root_set = set(roots)
+
+ # Union of all settled nodes (roots + BFS-reachable)
+ all_nodes: set[str] = root_set.copy()
+ all_nodes.update(up_level.keys())
+ all_nodes.update(dn_level.keys())
+
+ result_nodes: list[dict] = []
+
+ for node_fq in sorted(all_nodes):
+ meta = node_registry.get(node_fq, {})
+ is_root_node = node_fq in root_set
+
+ upstream_level: int | None
+ downstream_level: int | None
+ nearest_root_val: str | None
+ direction: str | None
+
+ if is_root_node:
+ upstream_level = 0
+ downstream_level = 0
+ nearest_root_val = node_fq
+ direction = "ROOT"
+ else:
+ raw_up = up_level.get(node_fq)
+ raw_dn = dn_level.get(node_fq)
+
+ # upstream_level: negative (opposite sign to hop count)
+ upstream_level = (-(raw_up)) if raw_up is not None else None
+ # downstream_level: positive (same sign as hop count)
+ downstream_level = raw_dn if raw_dn is not None else None
+
+ # nearest_root: upstream wins on tie (matches SP behaviour)
+ nearest_root_val = up_root.get(node_fq) or dn_root.get(node_fq)
+
+ if raw_up is not None and raw_dn is not None:
+ direction = "BOTH"
+ elif raw_up is not None:
+ direction = "U"
+ elif raw_dn is not None:
+ direction = "D"
+ else:
+ direction = None # Should not occur — node is in all_nodes
+
+ result_nodes.append(
+ {
+ "node": node_fq,
+ "container_name": meta.get("container_name", ""),
+ "object_name": meta.get("object_name", ""),
+ "object_kind": meta.get("object_kind", ""),
+ "upstream_level": upstream_level,
+ "downstream_level": downstream_level,
+ "nearest_root": nearest_root_val,
+ "direction": direction,
+ "is_root": "Y" if is_root_node else "N",
+ }
+ )
+
+ logger.debug(f"Tool: handle_graph_bfsLevels: Assembled {len(result_nodes)} result nodes")
+
+ # ------------------------------------------------------------------
+ # Step 8 — Build summary and extract cycle candidates
+ # (re-uses existing private helpers from the SP-based tool)
+ # ------------------------------------------------------------------
+ cycle_cands = extract_cycle_candidates(result_nodes)
+ summary = create_bfs_summary(result_nodes, cycle_cands)
+
+ # ------------------------------------------------------------------
+ # Step 9 — Assemble response (identical schema to SP-based tool)
+ # ------------------------------------------------------------------
+ response_data = {
+ "nodes": result_nodes,
+ "cycle_candidates": cycle_cands,
+ "summary": summary,
+ }
+
+ metadata = {
+ "tool_name": _tn,
+ "root_node_list": root_node_list,
+ "max_depth_up": max_depth_up,
+ "max_depth_down": max_depth_down,
+ "exclude_objects": exclude_objects,
+ "include_containers": include_containers,
+ "edge_repository": edge_repository,
+ "implementation": "python_bfs", # distinguishes from SP-based tool
+ "graph_stats": {
+ "unique_nodes_in_graph": len(node_registry),
+ "raw_edges_fetched": edges_total,
+ "edges_excluded": edges_excluded,
+ "edges_traversed": edges_total - edges_excluded,
+ },
+ "counts": summary,
+ "status": "success",
+ "rtn_code": 0,
+ "message": (f"Module=graph_bfsLevels;RootCount={len(roots)};TotalNodes={len(result_nodes)};Success;"),
+ }
+
+ logger.debug(f"Tool: handle_graph_bfsLevels: metadata: {metadata}")
+ return create_response(response_data, metadata)
+
+ except Exception as e:
+ logger.error(f"Tool: handle_graph_bfsLevels: Error: {e}", exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": _tn,
+ "root_node_list": root_node_list,
+ "status": "error",
+ },
+ )
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+# parse_csv_patterns is imported from _graph_utils.
+
+
+def _matches_any(fq_name: str, patterns: list[str]) -> bool:
+ """
+ Return True if fq_name matches any pattern in patterns.
+
+ Converts SQL LIKE wildcards (%) to fnmatch wildcards (*) before matching.
+ Case-insensitive to match Teradata NOT CASESPECIFIC behaviour.
+
+ Arguments:
+ fq_name - Fully-qualified object name (e.g. 'MyDB.MyTable')
+ patterns - List of LIKE-style patterns (e.g. ['DFJ%', '%.temp_%'])
+
+ Returns:
+ True if any pattern matches, False otherwise
+ """
+ name_lower = fq_name.lower()
+ for pat in patterns:
+ # Convert SQL LIKE % to fnmatch *
+ fn_pat = pat.replace("%", "*").lower()
+ if fnmatch.fnmatch(name_lower, fn_pat):
+ return True
+ return False
+
+
+def _matches_container_any(container: str, patterns: list[str]) -> bool:
+ """
+ Return True if the container name matches any of the given patterns.
+
+ Used to validate include_containers filter against container names.
+ Converts SQL LIKE % to fnmatch * for matching.
+
+ Arguments:
+ container - Database/container name (e.g. 'DEV01_StGeo_STD_T')
+ patterns - List of LIKE-style container patterns
+
+ Returns:
+ True if any pattern matches, False otherwise
+ """
+ if not patterns:
+ return True # No whitelist = all containers included
+ name_lower = container.lower()
+ for pat in patterns:
+ fn_pat = pat.replace("%", "*").lower()
+ if fnmatch.fnmatch(name_lower, fn_pat):
+ return True
+ return False
+
+
+def _build_fetch_sql(
+ edge_repository: str,
+ incl_patterns: list[str],
+) -> str:
+ """
+ Build the SQL query to fetch edges from the edge repository.
+
+ include_containers is pushed into the WHERE clause for efficiency.
+ exclude_objects is applied in Python after fetching.
+
+ Edge repository column usage:
+ Src_Object_Name_FQ — fully-qualified dependency (upstream)
+ Tgt_Object_Name_FQ — fully-qualified dependent (downstream)
+
+ Arguments:
+ edge_repository - Fully-qualified view/table name
+ incl_patterns - Parsed list of container LIKE patterns (may be empty)
+
+ Returns:
+ SQL string ready for cursor.execute()
+ """
+ base_sql = f"""
+LOCKING ROW FOR ACCESS
+SELECT
+ TRIM(r.Src_Object_Name_FQ) AS Src_Object_Name_FQ
+ ,TRIM(r.Tgt_Object_Name_FQ) AS Tgt_Object_Name_FQ
+ ,TRIM(r.Src_Container_Name) AS Src_Container_Name
+ ,TRIM(r.Src_Object_Name) AS Src_Object_Name
+ ,TRIM(r.Src_Kind) AS Src_Kind
+ ,TRIM(r.Tgt_Container_Name) AS Tgt_Container_Name
+ ,TRIM(r.Tgt_Object_Name) AS Tgt_Object_Name
+ ,TRIM(r.Tgt_Kind) AS Tgt_Kind
+FROM {edge_repository} r
+WHERE r.Src_Object_Name_FQ IS NOT NULL
+AND TRIM(r.Src_Object_Name_FQ) <> ''
+AND r.Tgt_Object_Name_FQ IS NOT NULL
+AND TRIM(r.Tgt_Object_Name_FQ) <> ''"""
+
+ if incl_patterns:
+ # Build OR-expanded WHERE clause for container inclusion.
+ # Applies to BOTH Src and Tgt containers — an edge is included only
+ # if both endpoints are within the whitelisted container set.
+ src_clauses = " OR ".join(f"TRIM(r.Src_Container_Name) LIKE '{p}'" for p in incl_patterns)
+ tgt_clauses = " OR ".join(f"TRIM(r.Tgt_Container_Name) LIKE '{p}'" for p in incl_patterns)
+ base_sql += f"\nAND ({src_clauses})"
+ base_sql += f"\nAND ({tgt_clauses})"
+
+ return base_sql + ";"
+
+
+def _val(row, col_idx: dict, col_name: str) -> str | None:
+ """
+ Safely extract a value from a result row by column name.
+
+ Arguments:
+ row - Tuple of row values from cursor.fetchall()
+ col_idx - Dict mapping lowercase column name → position index
+ col_name - Column name to look up (lowercase)
+
+ Returns:
+ Stripped string value, or None if missing/null
+ """
+ idx = col_idx.get(col_name)
+ if idx is None:
+ return None
+ val = row[idx]
+ if val is None:
+ return None
+ return str(val).strip()
+
+
+def _bfs_multisource(
+ roots: list[str],
+ adj: dict[str, set[str]],
+ max_depth: int,
+ label: str,
+) -> tuple[dict[str, int], dict[str, str]]:
+ """
+ Standard queue-based multi-source BFS from a set of root nodes.
+
+ All roots are seeded simultaneously at level 0 (multi-source BFS).
+ Each reachable node settles at the hop count to its nearest root.
+ Ties are broken deterministically: the lexicographically smallest
+ root name wins (consistent with MIN(nearest_root) in the SP).
+
+ Importantly, root nodes themselves are NOT added to the level/root
+ dicts returned — they are handled separately in the caller as
+ direction='ROOT'. This prevents roots from appearing twice in output.
+
+ Arguments:
+ roots - List of exact root node FQ names
+ adj - Adjacency dict: node → {reachable neighbours}
+ For upstream BFS: rev_adj (Tgt → {Src})
+ For downstream BFS: fwd_adj (Src → {Tgt})
+ max_depth - Maximum hops to traverse from any root
+ label - 'upstream' or 'downstream' (used for logging only)
+
+ Returns:
+ Tuple of:
+ level_map - Dict: node_fq → hop_count (1..max_depth)
+ Root nodes are NOT included (handled separately).
+ root_map - Dict: node_fq → nearest_root FQ name
+ """
+ level_map: dict[str, int] = {}
+ root_map: dict[str, str] = {}
+
+ # Seed: all root nodes at level 0.
+ # Visited set initialised with roots so they are never re-settled
+ # by BFS propagation from other roots.
+ visited: set[str] = set(roots)
+
+ # Queue entries: (node_fq, nearest_root_fq, current_depth)
+ queue: deque[tuple[str, str, int]] = deque()
+
+ for r in sorted(roots): # sorted → lexicographic tie-breaking
+ queue.append((r, r, 0))
+
+ while queue:
+ node, nearest_root, depth = queue.popleft()
+
+ if depth >= max_depth:
+ # At depth cap — do not propagate further from this node
+ continue
+
+ # Traverse neighbours from the adjacency dict
+ for neighbour in sorted(adj.get(node, [])): # sorted → determinism
+ if neighbour in visited:
+ continue
+
+ visited.add(neighbour)
+ new_depth = depth + 1
+ level_map[neighbour] = new_depth
+ root_map[neighbour] = nearest_root
+ queue.append((neighbour, nearest_root, new_depth))
+
+ logger.debug(f"_bfs_multisource [{label}]: settled {len(level_map)} non-root nodes")
+ return level_map, root_map
+
+
+# bfs_safe_int — imported from _graph_utils
+
+
+# create_bfs_summary — imported from _graph_utils
+
+
+# extract_cycle_candidates — imported from _graph_utils
+
+
+# ---------------------------------------------------------------------------
+# Tool registration descriptor
+#
+# Register alongside the other GRAPH_*_TOOL descriptors in graph_tools.py.
+# ---------------------------------------------------------------------------
+GRAPH_BFS_LEVELS_TOOL = {
+ # Tool name matches the MCP protocol
+ # interface and all existing agent prompts.
+ "name": "graph_bfsLevels",
+ "handler": handle_graph_bfsLevels,
+ "description": (
+ "Compute BFS shortest-path hop distances from one or more root nodes "
+ "in the dependency graph. Pure-Python implementation — no stored "
+ "procedure required. One SQL round-trip to fetch edges, then all BFS "
+ "computation runs in the MCP server process. "
+ ""
+ "Returns one row per reachable node with: upstream_level (None for root "
+ "objects with in-degree zero, negative for upstream ancestors), "
+ "downstream_level (0 for roots, positive for consumers), nearest_root "
+ "(which of the input root nodes this object is closest to), direction "
+ "(ROOT/U/D/BOTH), and is_root flag. Output schema is identical to the "
+ "SP-based graph_bfsLevels tool. "
+ ""
+ "USE THIS TOOL — not graph_traceLineage — when asked to: "
+ "sequence objects for deployment or migration (ORDER BY downstream_level "
+ "gives correct topological deployment order for objects downstream of "
+ "root tables); group objects into migration waves (nearest_root groups "
+ "each object under its closest root table); find which migration root "
+ "table each object belongs to across a multi-root migration scope; count "
+ "objects within N hops of a change for blast-radius sizing; identify "
+ "cycle members by depth (direction=BOTH nodes with unequal absolute "
+ "upstream/downstream levels are cycle candidates); or answer how far any "
+ "object is from the migration root tables. "
+ ""
+ "Do NOT use this tool for general lineage tracing, impact path analysis, "
+ "or questions about which specific objects depend on which — use "
+ "graph_traceLineage for those. graph_bfsLevels returns "
+ "distances and wave groupings, not dependency paths or edge detail. "
+ ""
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "If you don't have one yet, call graph_edgeContractDDL first to "
+ "generate the CREATE TABLE or CREATE VIEW DDL. "
+ ""
+ "IMPORTANT: root_node_list accepts exact fully-qualified names only "
+ "(no wildcards). Use graph_findRootObjects first if needed."
+ ),
+ "parameters": {
+ "root_node_list": {
+ "type": "string",
+ "description": (
+ "CSV of exact fully-qualified root node names. No wildcards. "
+ "Single: 'MyDB.MyTable'. "
+ "Multiple: 'MyDB.TableA,MyDB.TableB,MyDB.TableC'."
+ ),
+ "required": True,
+ },
+ "max_depth_up": {
+ "type": "integer",
+ "description": (
+ "Maximum upstream hops to traverse. Upstream = what the node "
+ "depends on (its sources and ancestors). "
+ "0 = skip upstream entirely. Default: 10."
+ ),
+ "default": 10,
+ },
+ "max_depth_down": {
+ "type": "integer",
+ "description": (
+ "Maximum downstream hops to traverse. Downstream = what depends "
+ "on the node (its consumers and impact radius). "
+ "0 = skip downstream entirely. Default: 10."
+ ),
+ "default": 10,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": (
+ "CSV of FQ object name LIKE patterns to exclude from traversal. "
+ "Matched against both Src and Tgt sides of every edge. "
+ "SQL LIKE wildcards (%) supported. "
+ "Example: 'DFJ%,C_D02%,%.temp_%'. Default: '' (no exclusions)."
+ ),
+ "default": "",
+ },
+ "include_containers": {
+ "type": "string",
+ "description": (
+ "CSV of container name LIKE patterns to include. "
+ "Only edges where BOTH Src and Tgt containers match at least "
+ "one pattern are fetched and traversed. "
+ "SQL LIKE wildcards (%) supported. "
+ "Example: 'DEV01_StGeo%,MF_STGEO%,TABLEAU%,POWERBI%'. "
+ "Default: '' (all containers)."
+ ),
+ "default": "",
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required parameter — no default."
+ ),
+ "required": True,
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_connected_components.py b/src/teradata_mcp_server/tools/graph/graph_connected_components.py
new file mode 100644
index 0000000..9e32184
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_connected_components.py
@@ -0,0 +1,479 @@
+"""
+graph_connectedComponents.py — Connected components analysis tool.
+
+Provides handle_graph_connectedComponents and GRAPH_CONNECTED_COMPONENTS_TOOL.
+
+Pure-Python implementation — no stored procedure required.
+
+Algorithm overview:
+ 1. Fetch all edges within the container scope in a single SQL SELECT.
+ 2. Run Union-Find (path-compressed) to assign every node to a component.
+ 3. Compute per-component summaries and overall statistics in Python.
+ 4. Assemble the same three-structure response the SP returned:
+ node_details — one row per node with Component_Id
+ component_summaries — one row per component with node count and list
+ summary_stats — single aggregate row
+
+Edge direction convention (matches Edge Repository / graph_bfsLevels):
+ Src_Object_Name is REFERENCED BY Tgt_Object_Name.
+ For WCC purposes edge direction is ignored — two nodes are in the same
+ component if there is any path (directed or undirected) between them.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import logging
+from collections import defaultdict
+from typing import Any
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import (
+ build_like_or,
+ parse_csv_patterns,
+)
+from teradata_mcp_server.tools.utils import create_response
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+# parse_csv_patterns and build_like_or are imported from _graph_utils.
+
+
+def _build_excl_clauses(patterns: list[str]) -> str:
+ """
+ Build a NOT (...) exclusion fragment for container/object patterns.
+
+ A pattern containing a dot is treated as a fully-qualified DB.Object
+ pattern; a plain pattern is matched against the container name only.
+
+ Arguments:
+ patterns - List of exclusion LIKE patterns
+
+ Returns:
+ SQL fragment beginning with "AND NOT (...)" or empty string
+ """
+ if not patterns:
+ return ""
+
+ conditions = []
+ for p in patterns:
+ if "." in p:
+ db_part, obj_part = p.split(".", 1)
+ conditions.append(f"(Src_Container_Name LIKE '{db_part}' AND Src_Object_Name LIKE '{obj_part}')")
+ else:
+ conditions.append(f"Src_Container_Name LIKE '{p}'")
+
+ return "AND NOT (" + " OR ".join(conditions) + ")"
+
+
+# ---------------------------------------------------------------------------
+# Union-Find
+# ---------------------------------------------------------------------------
+
+
+class _UnionFind:
+ """
+ Union-Find with path compression.
+
+ Assigns every node to a canonical component representative.
+ union() merges two components; find() returns the representative.
+ """
+
+ def __init__(self):
+ self._parent: dict[str, str] = {}
+
+ def find(self, x: str) -> str:
+ """Return canonical representative for x (with path compression)."""
+ self._parent.setdefault(x, x)
+ # -- Walk to root --
+ root = x
+ while self._parent[root] != root:
+ root = self._parent[root]
+ # -- Path compression (flatten all nodes to root) --
+ while self._parent[x] != root:
+ self._parent[x], x = root, self._parent[x]
+ return root
+
+ def union(self, a, b) -> None:
+ """Merge the components containing a and b."""
+ ra, rb = self.find(a), self.find(b)
+ if ra != rb:
+ self._parent[ra] = rb
+
+ def all_nodes(self) -> set:
+ """Return the set of all nodes known to this Union-Find."""
+ return set(self._parent.keys())
+
+ def component_map(self) -> dict[str, str]:
+ """Return {node: component_root} for all known nodes."""
+ return {n: self.find(n) for n in self._parent}
+
+
+# ---------------------------------------------------------------------------
+# Response assembly helpers
+# ---------------------------------------------------------------------------
+
+
+def _build_node_details(
+ component_map: dict[str, str],
+ root_to_id: dict[str, int],
+ node_kind: dict[str, str],
+) -> list[dict]:
+ """
+ Build node_details — one row per node with its Component_Id.
+
+ Arguments:
+ component_map - {node_fq: component_root} from Union-Find
+ root_to_id - {component_root: integer_id} mapping
+ node_kind - {node_fq: object_kind} from the edge fetch
+
+ Returns:
+ List of node detail dicts
+ """
+ rows = []
+ for node_fq, comp_root in sorted(component_map.items()):
+ parts = node_fq.split(".", 1)
+ db_name = parts[0] if len(parts) > 1 else ""
+ obj_name = parts[1] if len(parts) > 1 else parts[0]
+ rows.append(
+ {
+ "Node_FQ": node_fq,
+ "DatabaseName": db_name,
+ "ObjectName": obj_name,
+ "Component_Id": root_to_id[comp_root],
+ "Object_Kind": node_kind.get(node_fq, "Unknown"),
+ }
+ )
+ return rows
+
+
+def _build_component_summaries(
+ component_map: dict[str, str],
+ root_to_id: dict[str, int],
+) -> list[dict]:
+ """
+ Build component_summaries — one row per component.
+
+ Arguments:
+ component_map - {node_fq: component_root}
+ root_to_id - {component_root: integer_id}
+
+ Returns:
+ List of component summary dicts ordered by Component_Id
+ """
+ # Group nodes by component root
+ comp_nodes: dict[str, list[str]] = defaultdict(list)
+ for node_fq, comp_root in component_map.items():
+ comp_nodes[comp_root].append(node_fq)
+
+ rows: list[dict[str, Any]] = []
+ for comp_root, nodes in comp_nodes.items():
+ nodes_sorted = sorted(nodes)
+ rows.append(
+ {
+ "Component_Id": root_to_id[comp_root],
+ "Node_Count": len(nodes_sorted),
+ "Node_List": ", ".join(nodes_sorted),
+ }
+ )
+
+ rows.sort(key=lambda r: r["Component_Id"])
+ return rows
+
+
+def _build_summary_stats(
+ component_summaries: list[dict],
+ edge_count: int,
+) -> list[dict]:
+ """
+ Build summary_stats — single aggregate row.
+
+ Arguments:
+ component_summaries - List of component summary dicts
+ edge_count - Total edges loaded from the repository
+
+ Returns:
+ Single-element list
+ """
+ node_count = sum(c["Node_Count"] for c in component_summaries)
+ comp_count = len(component_summaries)
+
+ sizes = [c["Node_Count"] for c in component_summaries]
+ largest = max(sizes, default=0)
+ smallest = min(sizes, default=0)
+
+ singleton_count = sum(1 for s in sizes if s == 1)
+
+ return [
+ {
+ "Component_Count": comp_count,
+ "Node_Count": node_count,
+ "Edge_Count": edge_count,
+ "Largest_Component": largest,
+ "Smallest_Component": smallest,
+ "Singleton_Count": singleton_count,
+ "Summary_Message": (
+ f"{comp_count} connected component(s) identified across {node_count} node(s) and {edge_count} edge(s)."
+ ),
+ }
+ ]
+
+
+# ---------------------------------------------------------------------------
+# Public handler
+# ---------------------------------------------------------------------------
+
+
+def handle_graph_connectedComponents(
+ conn: TeradataConnection,
+ container_pattern: str,
+ exclude_objects: str = "",
+ edge_repository: str = "",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Identify all Weakly Connected Components (WCC) in the dependency graph.
+
+ Pure-Python implementation — no stored procedure required. Issues a single
+ SQL SELECT to fetch the scoped edge set, then performs Union-Find WCC
+ partitioning entirely in the MCP server process.
+
+ A connected component is a maximal set of nodes where every node can reach
+ every other node when edge direction is ignored. This partitions the graph
+ into isolated sub-graphs.
+
+ Use this tool for:
+ - Understanding graph structure and partitioning
+ - Identifying isolated sub-graphs
+ - Scoping downstream impact analysis to a single component
+ - Pre-filtering before cycle detection (cycles exist only within a component)
+ - Identifying "islands" of related objects for migration or refactoring
+ - Estimating blast radius
+
+ Arguments:
+ container_pattern - str: CSV LIKE patterns for container scope.
+ Supports wildcards (%) and CSV format.
+ Examples: '%WBC%', '%WBC%,%StGeo%', 'DEV01_%,DEV02_%'
+
+ CRITICAL: STRING type, not array.
+ CORRECT: container_pattern="%WBC%,%StGeo%"
+ WRONG: container_pattern=["%WBC%", "%StGeo%"]
+
+ exclude_objects - str: CSV LIKE patterns to exclude.
+ Matches against container name (or DB.Object if
+ the pattern contains a dot).
+ Default: '' (no exclusions)
+
+ edge_repository - str: Edge repository view/table conforming to the
+ Graph Edge Contract (Src_Container_Name,
+ Src_Object_Name, Src_Kind, Tgt_Container_Name,
+ Tgt_Object_Name, Tgt_Kind columns).
+ For AI-Native Data Products use:
+ '{ProductName}_Semantic.lineage_graph'
+ Call graph_edgeContractDDL to generate a new one.
+ Required — no default.
+
+ Returns:
+ ResponseType: formatted response with connected component results.
+
+ Response structure:
+ {
+ "node_details": [...], // One row per node with Component_Id
+ "component_summaries": [...], // One row per component
+ "summary_stats": [...] // Single aggregate row
+ }
+
+ node_details row fields:
+ Node_FQ, DatabaseName, ObjectName, Component_Id, Object_Kind
+
+ component_summaries row fields:
+ Component_Id, Node_Count, Node_List
+
+ summary_stats row fields:
+ Component_Count, Node_Count, Edge_Count,
+ Largest_Component, Smallest_Component, Singleton_Count, Summary_Message
+ """
+ logger.debug(
+ "Tool: handle_graph_connectedComponents: Args: container_pattern=%s, exclude_objects=%s, edge_repository=%s",
+ container_pattern,
+ exclude_objects,
+ edge_repository,
+ )
+
+ # -----------------------------------------------------------------------
+ # Parse and validate inputs
+ # -----------------------------------------------------------------------
+ container_patterns = parse_csv_patterns(container_pattern)
+ if not container_patterns:
+ return create_response(
+ {"error": "container_pattern must not be empty"},
+ {
+ "tool_name": tool_name or "graph_connectedComponents",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+ if not edge_repository:
+ return create_response(
+ {
+ "error": (
+ "edge_repository is required. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate a new edge repository."
+ )
+ },
+ {
+ "tool_name": tool_name or "graph_connectedComponents",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+ excl_pattern_list = parse_csv_patterns(exclude_objects)
+
+ try:
+ with conn.cursor() as cur:
+ # -------------------------------------------------------------------
+ # Step 1 — Fetch all scoped edges in one SQL SELECT
+ # -------------------------------------------------------------------
+ container_where = build_like_or(container_patterns, "Src_Container_Name")
+ excl_where = _build_excl_clauses(excl_pattern_list)
+
+ edge_sql = f"""
+LOCKING ROW FOR ACCESS
+SELECT
+ TRIM(Src_Container_Name) || '.' || TRIM(Src_Object_Name) AS Src_FQ
+ ,TRIM(Tgt_Container_Name) || '.' || TRIM(Tgt_Object_Name) AS Tgt_FQ
+ ,COALESCE(TRIM(Src_Kind), 'Unknown') AS Src_Kind
+FROM {edge_repository}
+WHERE {container_where}
+ {excl_where}
+"""
+ logger.debug("Tool: handle_graph_connectedComponents: Fetching edges:\n%s", edge_sql)
+
+ cur.execute(edge_sql)
+ raw_edges = cur.fetchall()
+
+ # -------------------------------------------------------------------
+ # Step 2 — Build Union-Find and collect node kinds
+ # -------------------------------------------------------------------
+ uf = _UnionFind()
+ node_kind: dict[str, str] = {} # {node_fq: object_kind}
+
+ for src_fq, tgt_fq, src_kind in raw_edges:
+ uf.union(src_fq, tgt_fq)
+ # Record source kind; target kind not available without a second lookup
+ if src_fq not in node_kind:
+ node_kind[src_fq] = src_kind or "Unknown"
+
+ edge_count = len(raw_edges)
+ logger.debug("Tool: handle_graph_connectedComponents: Loaded %d edges", edge_count)
+
+ # -------------------------------------------------------------------
+ # Step 3 — Assign integer component IDs
+ # -------------------------------------------------------------------
+ comp_map = uf.component_map()
+ unique_roots = sorted(set(comp_map.values()))
+ root_to_id = {r: i + 1 for i, r in enumerate(unique_roots)}
+
+ component_count = len(unique_roots)
+ logger.debug("Tool: handle_graph_connectedComponents: %d component(s) identified", component_count)
+
+ # -------------------------------------------------------------------
+ # Step 4 — Build response structures
+ # -------------------------------------------------------------------
+ node_details = _build_node_details(comp_map, root_to_id, node_kind)
+ component_summaries = _build_component_summaries(comp_map, root_to_id)
+ summary_stats = _build_summary_stats(component_summaries, edge_count)
+
+ response_data = {
+ "node_details": node_details,
+ "component_summaries": component_summaries,
+ "summary_stats": summary_stats,
+ }
+
+ metadata = {
+ "tool_name": tool_name or "graph_connectedComponents",
+ "container_pattern": container_pattern,
+ "exclude_objects": exclude_objects,
+ "edge_repository": edge_repository,
+ "result_set_counts": {
+ "node_details": len(node_details),
+ "component_summaries": len(component_summaries),
+ "summary_stats": len(summary_stats),
+ },
+ "status": "success",
+ "message": summary_stats[0]["Summary_Message"],
+ }
+
+ logger.debug("Tool: handle_graph_connectedComponents: metadata: %s", metadata)
+ return create_response(response_data, metadata)
+
+ except Exception as e:
+ logger.error("Tool: handle_graph_connectedComponents: Error: %s", e, exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": tool_name or "graph_connectedComponents",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+
+# ---------------------------------------------------------------------------
+# Tool registration descriptor
+# ---------------------------------------------------------------------------
+GRAPH_CONNECTED_COMPONENTS_TOOL = {
+ "name": "graph_connectedComponents",
+ "handler": handle_graph_connectedComponents,
+ "description": (
+ "Identify all Weakly Connected Components (WCC) in the dependency graph. "
+ "Pure-Python implementation — no stored procedure required. "
+ "A connected component is a maximal set of nodes reachable from one another "
+ "when edge direction is ignored. Fetches the scoped edge set in one SQL SELECT, "
+ "then performs Union-Find WCC partitioning in the MCP server process. "
+ "Returns node-to-component mapping, per-component summaries, and overall "
+ "statistics. Use to understand graph structure, identify isolated sub-graphs, "
+ "scope impact analysis, or pre-filter before cycle detection. "
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate a new edge repository."
+ ),
+ "parameters": {
+ "container_pattern": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns for containers (databases/schemas) to scan. "
+ "Supports wildcards: 'DFJ%' or '%WBC%,%StGeo%' for multiple."
+ ),
+ "required": True,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns to exclude from the scan. "
+ "Matches against container name (or DB.Object if pattern contains a dot). "
+ "Example: 'DFJ%,C_D02%'. Default: '' (no exclusions)."
+ ),
+ "default": "",
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required — no default."
+ ),
+ "required": True,
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_detect_cycles.py b/src/teradata_mcp_server/tools/graph/graph_detect_cycles.py
new file mode 100644
index 0000000..5afd73b
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_detect_cycles.py
@@ -0,0 +1,555 @@
+"""
+graph_detectCycles.py — Cycle detection tool.
+
+Provides handle_graph_detectCycles and GRAPH_DETECT_CYCLES_TOOL.
+
+Pure-Python implementation — no stored procedure required.
+
+Algorithm overview:
+ 1. Fetch all edges within the container scope in a single SQL SELECT.
+ 2. Perform Union-Find (WCC partitioning) to identify connected components.
+ 3. Run iterative DFS (grey/black colouring) independently within each
+ component. Iterative DFS avoids Python's recursion limit on deep graphs.
+ 4. Collect and deduplicate all directed cycles found.
+ 5. Assemble the same three-structure response the SP returned:
+ cycle_details — one row per node per cycle
+ cycle_summaries — one row per cycle with human-readable path
+ summary_stats — single aggregate row
+
+Edge direction convention (matches Edge Repository / graph_bfsLevels):
+ Src_Object_Name is REFERENCED BY Tgt_Object_Name.
+ => Src is the DEPENDENCY (upstream of Tgt).
+ => Tgt is the DEPENDENT (downstream of Src).
+ The directed edge for cycle detection runs Src → Tgt:
+ a view (Tgt) DEPENDS ON a table (Src), so the edge Src→Tgt represents
+ "Src must exist before Tgt". A cycle in this direction is a genuine
+ circular dependency.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import logging
+from collections import defaultdict
+from collections.abc import Iterator
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import (
+ build_like_or,
+ parse_csv_patterns,
+)
+from teradata_mcp_server.tools.utils import create_response
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+# parse_csv_patterns and build_like_or are imported from _graph_utils.
+
+
+def _build_excl_clauses(patterns: list[str]) -> str:
+ """
+ Build a NOT (...) exclusion fragment for Src_Container_Name LIKE patterns.
+
+ A pattern containing a dot is treated as a fully-qualified DB.Object pattern;
+ a plain pattern is matched against the container name only.
+
+ Arguments:
+ patterns - List of exclusion LIKE patterns
+
+ Returns:
+ SQL fragment beginning with "AND NOT (...)" or empty string
+ """
+ if not patterns:
+ return ""
+
+ conditions = []
+ for p in patterns:
+ if "." in p:
+ db_part, obj_part = p.split(".", 1)
+ conditions.append(f"(Src_Container_Name LIKE '{db_part}' AND Src_Object_Name LIKE '{obj_part}')")
+ else:
+ conditions.append(f"Src_Container_Name LIKE '{p}'")
+
+ return "AND NOT (" + " OR ".join(conditions) + ")"
+
+
+# ---------------------------------------------------------------------------
+# Union-Find for WCC partitioning
+# ---------------------------------------------------------------------------
+
+
+class _UnionFind:
+ """
+ Simple Union-Find with path compression.
+
+ Used to partition the edge set into Weakly Connected Components before
+ running per-component DFS. Partitioning dramatically reduces the work
+ per DFS call on graphs with many isolated sub-graphs.
+ """
+
+ def __init__(self):
+ self._parent: dict[str, str] = {}
+
+ def find(self, x: str) -> str:
+ """Return canonical representative of x's component (with path compression)."""
+ self._parent.setdefault(x, x)
+ if self._parent[x] != x:
+ self._parent[x] = self.find(self._parent[x]) # path compression
+ return self._parent[x]
+
+ def union(self, a, b) -> None:
+ """Merge the components containing a and b."""
+ ra, rb = self.find(a), self.find(b)
+ if ra != rb:
+ self._parent[ra] = rb
+
+ def component_map(self) -> dict[str, str]:
+ """Return {node: component_root} for all known nodes."""
+ return {n: self.find(n) for n in self._parent}
+
+
+# ---------------------------------------------------------------------------
+# Iterative DFS cycle detection
+# ---------------------------------------------------------------------------
+
+
+def _detect_cycles_in_subgraph(nodes: set, adj: dict[str, list[str]]) -> list[list[str]]:
+ """
+ Find all simple directed cycles reachable in an adjacency sub-graph.
+
+ Uses an iterative DFS with grey/black node colouring. The iterative
+ approach is mandatory — Python's default recursion limit (1 000) is
+ easily exceeded on deep dependency chains.
+
+ A node is GREY while it is on the current DFS stack (being explored).
+ A node is BLACK once all its descendants have been fully explored.
+ A back-edge into a GREY node signals a cycle.
+
+ The cycle path is reconstructed from the DFS stack at the moment the
+ back-edge is detected.
+
+ Arguments:
+ nodes - Set of node FQ names in this component
+ adj - Adjacency list {src: [tgt, ...]} for the full graph
+ (caller is responsible for scoping to this component)
+
+ Returns:
+ List of cycles; each cycle is a list of FQ node names (start == end).
+ """
+ white, grey, black = 0, 1, 2
+ colour: dict[str, int] = {}
+ cycles: list[list[str]] = []
+
+ for start in nodes:
+ if colour.get(start) == black:
+ continue
+
+ # Stack entries: (node, iterator-over-neighbours, path-so-far)
+ stack: list[tuple[str, Iterator[str], list[str]]] = [(start, iter(adj.get(start, [])), [start])]
+ colour[start] = grey
+
+ while stack:
+ node, neighbours, path = stack[-1]
+ try:
+ nxt = next(neighbours)
+
+ if colour.get(nxt) == grey:
+ # Back-edge found — reconstruct the cycle portion
+ cycle_start_idx = path.index(nxt)
+ cycle = path[cycle_start_idx:] + [nxt]
+ cycles.append(cycle)
+
+ elif colour.get(nxt) != black:
+ colour[nxt] = grey
+ stack.append((nxt, iter(adj.get(nxt, [])), path + [nxt]))
+
+ except StopIteration:
+ colour[node] = black
+ stack.pop()
+
+ return cycles
+
+
+# ---------------------------------------------------------------------------
+# Response assembly helpers
+# ---------------------------------------------------------------------------
+
+
+def _build_cycle_details(cycles: list[list[str]], component_id_map: dict[str, int]) -> list[dict]:
+ """
+ Build the cycle_details result set — one row per node per cycle.
+
+ Arguments:
+ cycles - List of cycle paths (each a list of FQ names, start==end)
+ component_id_map - {node_fq: component_id} lookup
+
+ Returns:
+ List of dicts matching the SP's cur_NodeDetails schema
+ """
+ rows = []
+ for cycle_id, cycle in enumerate(cycles, start=1):
+ # The last element is a repeat of the first — omit it for position count
+ members = cycle[:-1]
+ for pos, node_fq in enumerate(members, start=1):
+ rows.append(
+ {
+ "Cycle_Id": cycle_id,
+ "Cycle_Pos": pos,
+ "Node_FQ": node_fq,
+ "Cycle_Length": len(members),
+ "Component_Id": component_id_map.get(node_fq, -1),
+ "Strategy": "DFS",
+ }
+ )
+ return rows
+
+
+def _build_cycle_summaries(cycles: list[list[str]], component_id_map: dict[str, int]) -> list[dict]:
+ """
+ Build the cycle_summaries result set — one row per cycle.
+
+ Arguments:
+ cycles - List of cycle paths
+ component_id_map - {node_fq: component_id} lookup
+
+ Returns:
+ List of dicts matching the SP's cur_CompSummaries schema
+ """
+ rows = []
+ for cycle_id, cycle in enumerate(cycles, start=1):
+ members = cycle[:-1]
+ path_str = " -> ".join(cycle) # start → ... → start
+ rows.append(
+ {
+ "Cycle_Id": cycle_id,
+ "Cycle_Length": len(members),
+ "Component_Id": component_id_map.get(members[0], -1),
+ "Strategy": "DFS",
+ "Cycle_Path": path_str,
+ }
+ )
+ return rows
+
+
+def _build_summary_stats(cycles: list[list[str]], edge_count: int, component_count: int) -> list[dict]:
+ """
+ Build the summary_stats result set — single aggregate row.
+
+ Arguments:
+ cycles - List of detected cycles
+ edge_count - Total edges loaded from the repository
+ component_count - Number of WCC components identified
+
+ Returns:
+ Single-element list matching the SP's cur_SummaryStats schema
+ """
+ total_nodes_in_cycles = sum(len(c) - 1 for c in cycles) # exclude repeated end
+ components_with_cycles = len({c[0] for c in cycles}) # rough proxy
+
+ if len(cycles) == 0:
+ message = "No cycles detected — graph is a DAG."
+ elif len(cycles) == 1:
+ message = "1 cycle detected."
+ else:
+ message = f"{len(cycles)} cycles detected."
+
+ return [
+ {
+ "Cycle_Count": len(cycles),
+ "Total_Nodes_In_Cycles": total_nodes_in_cycles,
+ "Components_With_Cycles": components_with_cycles,
+ "Edge_Count": edge_count,
+ "Components_Scanned": component_count,
+ "Strategy_Used": "DFS",
+ "Summary_Message": message,
+ }
+ ]
+
+
+# ---------------------------------------------------------------------------
+# Public handler
+# ---------------------------------------------------------------------------
+
+
+def handle_graph_detectCycles(
+ conn: TeradataConnection,
+ container_pattern: str,
+ exclude_objects: str = "",
+ edge_repository: str = "",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Detect circular dependencies (cycles) in the dependency graph.
+
+ Pure-Python implementation — no stored procedure required. Issues a single
+ SQL SELECT to fetch the scoped edge set, then performs WCC partitioning
+ followed by iterative DFS cycle detection entirely in the MCP server process.
+
+ Use this tool for:
+ - Validating graph integrity (DAG property)
+ - Finding objects that form circular references
+ - Identifying stub-then-replace code patterns
+ - Debugging topological sort hangs
+ - Pre-deployment cycle checks
+
+ Arguments:
+ container_pattern - str: CSV LIKE patterns for container scope.
+ Supports wildcards (%) and CSV format.
+ Examples:
+ 'DFJ%' — single database family
+ '%WBC%,%StGeo%' — multiple families
+ 'DEV01_%,DEV02_%' — multiple prefixes
+
+ exclude_objects - str: CSV LIKE patterns to exclude from the scan.
+ Matches against container name (or DB.Object if
+ the pattern contains a dot).
+ Default: '' (no exclusions)
+
+ edge_repository - str: Edge repository view/table conforming to the
+ Graph Edge Contract (Src_Container_Name,
+ Src_Object_Name, Src_Kind, Tgt_Container_Name,
+ Tgt_Object_Name, Tgt_Kind columns).
+ For AI-Native Data Products use:
+ '{ProductName}_Semantic.lineage_graph'
+ Call graph_edgeContractDDL to generate a new one.
+ Required — no default.
+
+ Returns:
+ ResponseType: formatted response with cycle detection results.
+
+ Response structure:
+ {
+ "cycle_details": [...], // One row per node per cycle
+ "cycle_summaries": [...], // One row per cycle with path string
+ "summary_stats": [...] // Single aggregate row
+ }
+
+ cycle_details row fields:
+ Cycle_Id, Cycle_Pos, Node_FQ, Cycle_Length, Component_Id
+
+ cycle_summaries row fields:
+ Cycle_Id, Cycle_Length, Component_Id, Cycle_Path
+
+ summary_stats row fields:
+ Cycle_Count, Total_Nodes_In_Cycles, Components_With_Cycles,
+ Edge_Count, Components_Scanned, Summary_Message
+ """
+ logger.debug(
+ "Tool: handle_graph_detectCycles: Args: container_pattern=%s, exclude_objects=%s, edge_repository=%s",
+ container_pattern,
+ exclude_objects,
+ edge_repository,
+ )
+
+ # -----------------------------------------------------------------------
+ # Parse and validate inputs
+ # -----------------------------------------------------------------------
+ container_patterns = parse_csv_patterns(container_pattern)
+ if not container_patterns:
+ return create_response(
+ {"error": "container_pattern must not be empty"},
+ {
+ "tool_name": tool_name or "graph_detectCycles",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+ if not edge_repository:
+ return create_response(
+ {
+ "error": (
+ "edge_repository is required. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate a new edge repository."
+ )
+ },
+ {
+ "tool_name": tool_name or "graph_detectCycles",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+ excl_pattern_list = parse_csv_patterns(exclude_objects)
+
+ try:
+ with conn.cursor() as cur:
+ # -------------------------------------------------------------------
+ # Step 1 — Fetch all scoped edges in one SQL SELECT
+ # -------------------------------------------------------------------
+ container_where = build_like_or(container_patterns, "Src_Container_Name")
+ excl_where = _build_excl_clauses(excl_pattern_list)
+
+ edge_sql = f"""
+LOCKING ROW FOR ACCESS
+SELECT
+ TRIM(Src_Container_Name) || '.' || TRIM(Src_Object_Name) AS Src_FQ
+ ,TRIM(Tgt_Container_Name) || '.' || TRIM(Tgt_Object_Name) AS Tgt_FQ
+FROM {edge_repository}
+WHERE {container_where}
+ {excl_where}
+"""
+ logger.debug("Tool: handle_graph_detectCycles: Fetching edges:\n%s", edge_sql)
+
+ cur.execute(edge_sql)
+ raw_edges = cur.fetchall()
+
+ # -------------------------------------------------------------------
+ # Step 2 — Build adjacency list and WCC components
+ # -------------------------------------------------------------------
+ # adj[src] = [tgt, ...] — directed: src → tgt means tgt DEPENDS ON src
+ adj: dict[str, list[str]] = defaultdict(list)
+ uf = _UnionFind()
+
+ for src_fq, tgt_fq in raw_edges:
+ adj[src_fq].append(tgt_fq)
+ uf.union(src_fq, tgt_fq)
+
+ edge_count = len(raw_edges)
+ logger.debug("Tool: handle_graph_detectCycles: Loaded %d edges", edge_count)
+
+ if edge_count == 0:
+ # No edges in scope — no cycles possible
+ return create_response(
+ {
+ "cycle_details": [],
+ "cycle_summaries": [],
+ "summary_stats": _build_summary_stats([], 0, 0),
+ },
+ {
+ "tool_name": tool_name or "graph_detectCycles",
+ "container_pattern": container_pattern,
+ "exclude_objects": exclude_objects,
+ "edge_repository": edge_repository,
+ "result_set_counts": {
+ "cycle_details": 0,
+ "cycle_summaries": 0,
+ "summary_stats": 1,
+ },
+ "status": "success",
+ "message": "No edges found in scope — no cycles possible.",
+ },
+ )
+
+ # Assign integer component IDs from the Union-Find roots
+ comp_map = uf.component_map()
+ unique_roots = list(set(comp_map.values()))
+ root_to_id = {r: i + 1 for i, r in enumerate(unique_roots)}
+ component_id_map: dict[str, int] = {n: root_to_id[r] for n, r in comp_map.items()}
+
+ # Group nodes by component
+ components: dict[str, set[str]] = defaultdict(set)
+ for node, comp_root in comp_map.items():
+ components[comp_root].add(node)
+
+ component_count = len(components)
+ logger.debug("Tool: handle_graph_detectCycles: %d components identified", component_count)
+
+ # -------------------------------------------------------------------
+ # Step 3 — Run iterative DFS within each component
+ # -------------------------------------------------------------------
+ all_cycles: list[list[str]] = []
+
+ for _comp_root, comp_nodes in components.items():
+ cycles_in_comp = _detect_cycles_in_subgraph(comp_nodes, adj)
+ all_cycles.extend(cycles_in_comp)
+
+ logger.debug("Tool: handle_graph_detectCycles: %d cycle(s) detected", len(all_cycles))
+
+ # -------------------------------------------------------------------
+ # Step 4 — Assemble response structures
+ # -------------------------------------------------------------------
+ cycle_details = _build_cycle_details(all_cycles, component_id_map)
+ cycle_summaries = _build_cycle_summaries(all_cycles, component_id_map)
+ summary_stats = _build_summary_stats(all_cycles, edge_count, component_count)
+
+ response_data = {
+ "cycle_details": cycle_details,
+ "cycle_summaries": cycle_summaries,
+ "summary_stats": summary_stats,
+ }
+
+ metadata = {
+ "tool_name": tool_name or "graph_detectCycles",
+ "container_pattern": container_pattern,
+ "exclude_objects": exclude_objects,
+ "edge_repository": edge_repository,
+ "result_set_counts": {
+ "cycle_details": len(cycle_details),
+ "cycle_summaries": len(cycle_summaries),
+ "summary_stats": len(summary_stats),
+ },
+ "status": "success",
+ "message": summary_stats[0]["Summary_Message"],
+ }
+
+ logger.debug("Tool: handle_graph_detectCycles: metadata: %s", metadata)
+ return create_response(response_data, metadata)
+
+ except Exception as e:
+ logger.error("Tool: handle_graph_detectCycles: Error: %s", e, exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": tool_name or "graph_detectCycles",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+
+# ---------------------------------------------------------------------------
+# Tool registration descriptor
+# ---------------------------------------------------------------------------
+GRAPH_DETECT_CYCLES_TOOL = {
+ "name": "graph_detectCycles",
+ "handler": handle_graph_detectCycles,
+ "description": (
+ "Detect circular references (cycles) in the dependency graph. "
+ "Pure-Python implementation — no stored procedure required. "
+ "Fetches the scoped edge set in one SQL SELECT, partitions into Weakly "
+ "Connected Components via Union-Find, then runs iterative DFS cycle "
+ "detection within each component. "
+ "Returns each cycle as an ordered list of nodes with a human-readable "
+ "path string. Use to validate graph integrity, find stub-then-replace "
+ "patterns, or identify objects that will cause topological sort to hang. "
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate a new edge repository."
+ ),
+ "parameters": {
+ "container_pattern": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns for containers (databases/schemas) to scan. "
+ "Supports wildcards: 'DFJ%' or '%WBC%,%StGeo%' for multiple."
+ ),
+ "required": True,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns to exclude from the scan. "
+ "Matches against container name (or DB.Object if pattern contains a dot). "
+ "Example: 'DFJ%,C_D02%'. Default: '' (no exclusions)."
+ ),
+ "default": "",
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "For AI-Native Data Products use '{ProductName}_Semantic.lineage_graph'. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required — no default."
+ ),
+ "required": True,
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_edge_contract.py b/src/teradata_mcp_server/tools/graph/graph_edge_contract.py
new file mode 100644
index 0000000..7af3128
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_edge_contract.py
@@ -0,0 +1,622 @@
+# ------------------------------------------------------------------------------- #
+# File: graph_edge_contract.py #
+# #
+# Description: #
+# Graph Edge Contract — schema abstraction for the graph analysis tools. #
+# #
+# Provides: #
+# 1. GRAPH_EDGE_CONTRACT constant — canonical contract text, served as an #
+# MCP Resource via app.py registration. #
+# 2. handle_graph_edgeContractDDL() — MCP Tool that generates ready-to-run #
+# Teradata DDL for a contract-conforming edge table or view. #
+# #
+# The graph analysis tools (findRootObjects, traceLineage, #
+# connectedComponents, detectCycles, bfsLevels, analyseDatabase) all #
+# require an edge repository — a table or view conforming to this contract. #
+# Users supply its fully-qualified name via the edge_repository parameter. #
+# #
+# Column names are deliberately platform-agnostic: #
+# Src_Container_Name / Tgt_Container_Name (not DatabaseName) #
+# Src_Object_Name / Tgt_Object_Name (not ObjectName) #
+# Src_Kind / Tgt_Kind (not Object_Kind) #
+# #
+# Optional enrichment columns (present in lineage_graph; ignored by tools #
+# that don't use them — safe to omit from custom edge repositories): #
+# Edge_Relationship — nature of the edge (e.g. ETL_INPUT, ETL_OUTPUT) #
+# Transformation_Type — process type (e.g. ETL, FEATURE_ENG, AGGREGATION) #
+# #
+# "Container" generalises across platforms: a Teradata database, a script #
+# directory, an Informatica workflow folder, a dbt project, etc. #
+# #
+# AI-Native Data Product shortcut: #
+# {ProductName}_Semantic.lineage_graph (Observability Module v1.5) already #
+# conforms to this contract and can be used directly as edge_repository. #
+# #
+# Contract Version: 1.1 #
+# ------------------------------------------------------------------------------- #
+
+import logging
+from typing import Any
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ──────────────────────────────────────────────────────────────────────────────── #
+# GRAPH EDGE CONTRACT — Canonical Text #
+# #
+# Registered as an MCP Resource in app.py (URI: graph://edge-contract). #
+# AI agents retrieve this to understand the edge_repository schema required #
+# by all graph_* tools. #
+# ──────────────────────────────────────────────────────────────────────────────── #
+
+GRAPH_EDGE_CONTRACT = """
+Graph Edge Contract — Teradata MCP Server (Community Edition)
+=============================================================
+
+Version: 1.1
+Status: Stable
+Applies: All graph_* tools in the Teradata MCP Server
+
+
+PURPOSE
+-------
+The graph analysis tools operate on a directed dependency graph stored as an
+edge list. The edge repository is any Teradata table or view that conforms to
+this contract. Users supply its fully-qualified name via the edge_repository
+parameter on each graph tool.
+
+
+REQUIRED COLUMNS
+----------------
+ Column Name Type Nullable Description
+ ────────────────── ────────────── ──────── ──────────────────────────────────
+ Src_Container_Name VARCHAR(128) No Container of the source (upstream)
+ object. Platform-agnostic: a
+ Teradata database, a script
+ directory, an ETL workflow folder,
+ a dbt project, etc.
+
+ Src_Object_Name VARCHAR(128) No Name of the source object.
+
+ Src_Kind VARCHAR(30) No Object type of the source.
+ Recommended: T=Table, V=View,
+ P=Procedure, M=Macro, J=JoinIndex,
+ H=HashIndex, G=Trigger,
+ A=AggregateUDF, F=UDF, S=Script,
+ E=ETL Mapping.
+ Custom values permitted.
+
+ Tgt_Container_Name VARCHAR(128) No Container of the target
+ (downstream) object. Same
+ semantics as Src_Container_Name.
+
+ Tgt_Object_Name VARCHAR(128) No Name of the target object.
+
+ Tgt_Kind VARCHAR(30) No Object type of the target.
+ Same value domain as Src_Kind.
+
+
+EDGE SEMANTICS
+--------------
+Each row represents one directed dependency edge:
+
+ Source (Src) ──is referenced by──▶ Target (Tgt)
+
+The TARGET object depends on the SOURCE object.
+ - SOURCE is upstream: a prerequisite, a referenced table or script.
+ - TARGET is downstream: a consumer, a dependent view or mapping.
+
+Example:
+ Src_Container_Name='PROD_STD_T' Src_Object_Name='CUSTOMER' Src_Kind='Table'
+ Tgt_Container_Name='PROD_STD_V' Tgt_Object_Name='CUST_ACTIVE' Tgt_Kind='View'
+ Edge_Relationship='DIRECT' Transformation_Type='ETL'
+
+ Meaning: View PROD_STD_V.CUST_ACTIVE depends on table PROD_STD_T.CUSTOMER
+ via an ETL transformation.
+
+
+OPTIONAL COLUMNS
+----------------
+The following columns are recognised by the contract but not required by the
+graph analysis tools. They are ignored by tools that do not use them, so
+omitting them from a custom edge repository does not break conformance.
+
+ Column Name Type Nullable Description
+ ───────────────────── ───────────── ──────── ──────────────────────────────────
+ Edge_Relationship VARCHAR(50) Yes Nature of the dependency edge.
+ Recommended values:
+ DIRECT — object-to-object
+ dependency
+ ETL_INPUT — source table to
+ ETL job
+ ETL_OUTPUT — ETL job to target
+ table
+ JOIN — join dependency
+ TRANSFORM — general
+ transformation
+ Custom values permitted.
+ Produced by lineage_graph view.
+
+ Transformation_Type VARCHAR(50) Yes Process or transformation category.
+ Recommended values:
+ ETL FEATURE_ENG
+ AGGREGATION JOIN
+ EMBEDDING_GEN FILTER
+ PIVOT
+ Custom values permitted.
+ Sourced from data_lineage table.
+
+These columns are present in the {ProductName}_Semantic.lineage_graph view
+(Observability Module v1.5) and can be used by graph visualisation tools for
+edge labelling and filtering. The graph_* analysis tools (findRootObjects,
+bfsLevels, traceLineage, detectCycles, connectedComponents, analyseDatabase)
+do not read these columns — they operate on node identity only.
+
+
+NODE IDENTITY
+-------------
+Nodes are identified by fully-qualified name: Container.Object
+
+The graph tools construct this internally as:
+ Src_Container_Name || '.' || Src_Object_Name (source node)
+ Tgt_Container_Name || '.' || Tgt_Object_Name (target node)
+
+
+WHY "CONTAINER" NOT "DATABASE"
+------------------------------
+The column names are deliberately platform-agnostic. "Container" generalises
+across platforms and technologies:
+
+ Platform Container means
+ ──────────────── ────────────────────────────────────────
+ Teradata Database name
+ Oracle Schema name
+ SQL Server Database.Schema
+ Informatica Workflow or folder path
+ Shell scripts Directory path
+ dbt Project or schema
+ Tableau/Power BI Workbook or workspace
+
+This allows a single edge repository to hold cross-platform lineage —
+e.g., a Teradata table consumed by an Informatica mapping that feeds a
+Tableau dashboard — all in one graph.
+
+
+ADDITIONAL COLUMNS
+------------------
+The edge repository may contain additional columns beyond the required and
+optional columns defined in this contract. They will be ignored by the graph
+tools.
+
+
+CONTAINER SCOPING
+-----------------
+All graph tools accept container_pattern or include_containers parameters
+that filter edges using SQL LIKE against Src_Container_Name and Tgt_Container_Name.
+The edge repository should contain edges across ALL relevant containers —
+cross-container dependencies are the primary use case for graph analysis.
+
+
+DUPLICATE EDGES
+---------------
+The graph tools tolerate duplicate edges (same Src->Tgt pair appearing more
+than once). Duplicates are deduplicated in memory during adjacency list
+construction. For performance, it is recommended that the edge repository
+contains no duplicates.
+
+
+DDL GENERATION
+--------------
+Use the graph_edgeContractDDL tool to generate a ready-to-run CREATE TABLE
+or CREATE VIEW statement for a conforming edge repository.
+""".strip()
+
+
+# ──────────────────────────────────────────────────────────────────────────────── #
+# DDL GENERATOR — Tool Handler #
+# #
+# Generates Teradata DDL for a contract-conforming edge table or view. #
+# No database connection required — pure template generation. #
+# ──────────────────────────────────────────────────────────────────────────────── #
+
+
+def handle_graph_edgeContractDDL(
+ conn: Any,
+ target_database: str,
+ object_name: str = "EdgeRepository",
+ output_type: str = "TABLE",
+ **kwargs: Any,
+) -> list[dict[str, Any]]:
+ """
+ Generate DDL for a Graph Edge Contract-conforming table or view.
+
+ This tool does NOT require a database connection — it generates DDL
+ text from templates. No SQL is executed. The conn parameter is
+ accepted for ModuleLoader calling convention compatibility but is
+ not used.
+
+ Required columns in the generated schema (6):
+ Src_Container_Name, Src_Object_Name, Src_Kind,
+ Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind
+
+ Optional enrichment columns (2):
+ Edge_Relationship — nature of the edge (ETL_INPUT, ETL_OUTPUT, DIRECT…)
+ Transformation_Type — process category (ETL, FEATURE_ENG, AGGREGATION…)
+ These are ignored by graph analysis tools but useful for visualisation.
+
+ AI-Native Data Product shortcut:
+ If you are working within an AI-Native Data Product, the view
+ {ProductName}_Semantic.lineage_graph (Observability Module v1.5)
+ already conforms to this contract. You do not need to generate DDL
+ — pass that view's fully-qualified name directly as edge_repository
+ on any graph_* tool. Example:
+ edge_repository='StGeoMortgage_Semantic.lineage_graph'
+
+ Arguments:
+ conn: TeradataConnection (unused — accepted for
+ ModuleLoader compatibility).
+ target_database: Database in which to create the edge repository.
+ For AI-Native Data Products this is typically
+ {ProductName}_Semantic.
+ Example: 'StGeoMortgage_Semantic'
+ object_name: Name for the edge table/view.
+ Default: 'EdgeRepository'
+ output_type: 'TABLE' or 'VIEW'.
+ TABLE: generates CREATE TABLE DDL + separate sample DML.
+ Includes all 6 required + 2 optional columns.
+ VIEW: generates a CREATE VIEW template for mapping an
+ existing lineage source to all 8 contract columns.
+ Default: 'TABLE'
+
+ Returns:
+ list[dict]: Response payload containing:
+ - ddl: DDL script (CREATE TABLE/VIEW + COMMENTs)
+ - sample_dml: Sample INSERT statements + validation query
+ (TABLE only; absent for VIEW)
+ - output_type: 'TABLE' or 'VIEW'
+ - contract_version: Contract version string
+ """
+ logger.debug(
+ "Tool: handle_graph_edgeContractDDL: Args: target_database=%s, object_name=%s, output_type=%s",
+ target_database,
+ object_name,
+ output_type,
+ )
+
+ # ── Validate output_type ──────────────────────────────────────────────────
+ output_type = output_type.upper().strip()
+ if output_type not in ("TABLE", "VIEW"):
+ logger.warning("Tool: handle_graph_edgeContractDDL: Invalid output_type '%s'", output_type)
+ return [{"error": f"Invalid output_type '{output_type}'. Must be 'TABLE' or 'VIEW'."}]
+
+ # ── Generate DDL (and sample DML for TABLE variant) ─────────────────────
+ if output_type == "TABLE":
+ ddl = _generate_table_ddl(target_database, object_name)
+ sample_dml = _generate_sample_dml(target_database, object_name)
+ else:
+ ddl = _generate_view_ddl(target_database, object_name)
+ sample_dml = None
+
+ logger.info(
+ "Tool: handle_graph_edgeContractDDL: Generated %s DDL for %s.%s", output_type, target_database, object_name
+ )
+
+ result = {
+ "ddl": ddl,
+ "output_type": output_type,
+ "contract_version": "1.1",
+ }
+ if sample_dml is not None:
+ result["sample_dml"] = sample_dml
+
+ return [result]
+
+
+# ──────────────────────────────────────────────────────────────────────────────── #
+# Internal DDL Templates #
+# ──────────────────────────────────────────────────────────────────────────────── #
+
+
+def _generate_table_ddl(db: str, name: str) -> str:
+ """
+ Generate CREATE TABLE DDL with column comments (DDL only — no DML).
+
+ Follows the Teradata Engineering Discipline: DDL files contain only
+ structural statements (CREATE, COMMENT, GRANT). Sample DML is
+ returned separately by _generate_sample_dml().
+
+ Args:
+ db: Target database name.
+ name: Target table name.
+
+ Returns:
+ str: Teradata DDL script (CREATE TABLE + COMMENTs).
+ """
+ return f"""-- ================================================================
+-- Graph Edge Contract — Edge Repository
+-- Generated by: Teradata MCP Server (Community Edition)
+-- Contract Version: 1.1
+-- ================================================================
+
+CREATE SET TABLE {db}.{name}
+ ,NO FALLBACK
+ ,NO BEFORE JOURNAL
+ ,NO AFTER JOURNAL
+ ,CHECKSUM = DEFAULT
+ ,DEFAULT MERGEBLOCKRATIO
+(
+ -- ── Required columns (6) ─────────────────────────────────────
+ Src_Container_Name VARCHAR(128) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ ,Src_Object_Name VARCHAR(128) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ ,Src_Kind VARCHAR(30) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ COMPRESS ('T','V','P','M','J','H','G','A','F','S','E','R',
+ 'Table','View','Procedure','Macro','Job','Script')
+ ,Tgt_Container_Name VARCHAR(128) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ ,Tgt_Object_Name VARCHAR(128) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ ,Tgt_Kind VARCHAR(30) CHARACTER SET UNICODE NOT CASESPECIFIC NOT NULL
+ COMPRESS ('T','V','P','M','J','H','G','A','F','S','E','R',
+ 'Table','View','Procedure','Macro','Job','Script')
+ -- ── Optional enrichment columns (2) ──────────────────────────
+ -- Ignored by graph analysis tools; used by visualisation clients.
+ ,Edge_Relationship VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC
+ COMPRESS ('DIRECT','ETL_INPUT','ETL_OUTPUT',
+ 'JOIN','TRANSFORM','FILTER')
+ ,Transformation_Type VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC
+ COMPRESS ('ETL','FEATURE_ENG','AGGREGATION','JOIN',
+ 'EMBEDDING_GEN','FILTER','PIVOT')
+)
+UNIQUE PRIMARY INDEX (Src_Container_Name, Src_Object_Name, Tgt_Container_Name, Tgt_Object_Name)
+;
+
+-- ================================================================
+-- NOTE: Multi-Value Compression (MVC) on kind and optional columns
+-- ================================================================
+-- Src_Kind / Tgt_Kind COMPRESS lists cover both single-letter codes
+-- (legacy: T, V, P…) and full-word values (Table, View, Procedure…)
+-- used by the lineage_graph view. Remove unused values for optimal
+-- compression. Non-listed values store correctly but uncompressed.
+--
+-- Edge_Relationship and Transformation_Type COMPRESS lists cover the
+-- standard values from the Observability Module. Extend as needed for
+-- custom edge types in your edge repository.
+-- ================================================================
+
+COMMENT ON TABLE {db}.{name}
+ AS 'Graph Edge Contract v1.1 - edge repository for Teradata MCP Server graph tools. Each row is a directed dependency: Target depends on Source. Required: 6 columns. Optional enrichment: Edge_Relationship, Transformation_Type.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Src_Container_Name
+ AS 'Source (upstream) container. Platform-agnostic: Teradata database, script directory, ETL workflow folder, etc.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Src_Object_Name
+ AS 'Source (upstream) object name.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Src_Kind
+ AS 'Source object type. Single-letter codes (T=Table, V=View, P=Procedure, M=Macro, J=JoinIndex, H=HashIndex, G=Trigger, S=Script, E=ETL Mapping) or full words (Table, View, Job). Custom values permitted.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Tgt_Container_Name
+ AS 'Target (downstream) container. Same semantics as Src_Container_Name.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Tgt_Object_Name
+ AS 'Target (downstream) object name.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Tgt_Kind
+ AS 'Target object type. Same value domain as Src_Kind.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Edge_Relationship
+ AS 'Optional. Nature of the dependency edge. Standard values: DIRECT (object dependency), ETL_INPUT (source to job), ETL_OUTPUT (job to target), JOIN, TRANSFORM, FILTER. Custom values permitted. Ignored by graph analysis tools.'
+;
+
+COMMENT ON COLUMN {db}.{name}.Transformation_Type
+ AS 'Optional. Process or transformation category. Standard values: ETL, FEATURE_ENG, AGGREGATION, JOIN, EMBEDDING_GEN, FILTER, PIVOT. Sourced from data_lineage.transformation_type. Ignored by graph analysis tools.'
+;"""
+
+
+def _generate_sample_dml(db: str, name: str) -> str:
+ """
+ Generate sample INSERT statements and a validation query for a
+ Graph Edge Contract table.
+
+ Separated from the DDL to follow the Teradata Engineering Discipline:
+ DDL files (.tbl) must never contain INSERT/SELECT statements.
+
+ Args:
+ db: Target database name.
+ name: Target table name.
+
+ Returns:
+ str: Sample DML script (INSERTs + validation SELECT).
+ """
+ return f"""-- ================================================================
+-- Sample data — two edges forming a simple dependency chain:
+-- CUSTOMER (table) <- CUSTOMER_ACTIVE (view) <- CUSTOMER_REPORT (view)
+-- Optional columns omitted — they are not required for conformance.
+-- ================================================================
+
+INSERT INTO {db}.{name}
+( Src_Container_Name, Src_Object_Name, Src_Kind
+ ,Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind)
+VALUES
+( 'MY_DB_STD_T', 'CUSTOMER', 'Table'
+ ,'MY_DB_STD_V', 'CUSTOMER_ACTIVE', 'View')
+;
+
+INSERT INTO {db}.{name}
+( Src_Container_Name, Src_Object_Name, Src_Kind
+ ,Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind)
+VALUES
+( 'MY_DB_STD_V', 'CUSTOMER_ACTIVE', 'View'
+ ,'MY_DB_STD_V', 'CUSTOMER_REPORT', 'View')
+;
+
+-- ================================================================
+-- Cross-platform example with optional enrichment columns populated.
+-- An ETL job is surfaced as a first-class node (matching lineage_graph):
+-- CUSTOMER (table) -> ETL_LOAD (job) -> CUSTOMER_FEATURES (table)
+-- ================================================================
+
+INSERT INTO {db}.{name}
+( Src_Container_Name, Src_Object_Name, Src_Kind
+ ,Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind
+ ,Edge_Relationship, Transformation_Type)
+VALUES
+( 'MY_DB_STD_T', 'CUSTOMER', 'Table'
+ ,'', 'ETL_LOAD', 'Job'
+ ,'ETL_INPUT', 'ETL')
+;
+
+INSERT INTO {db}.{name}
+( Src_Container_Name, Src_Object_Name, Src_Kind
+ ,Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind
+ ,Edge_Relationship, Transformation_Type)
+VALUES
+( '', 'ETL_LOAD', 'Job'
+ ,'MY_PRED_STD_T', 'CUSTOMER_FEATURES', 'Table'
+ ,'ETL_OUTPUT', 'FEATURE_ENG')
+;
+
+-- ================================================================
+-- Validation — confirm the edge repository meets the contract.
+-- Only the six required columns must be NOT NULL.
+-- Expected result: 0 violations.
+-- ================================================================
+
+SELECT 'NULL_CHECK' AS Validation
+ ,COUNT(*) AS Violations
+FROM {db}.{name}
+WHERE Src_Container_Name IS NULL
+ OR Src_Object_Name IS NULL
+ OR Src_Kind IS NULL
+ OR Tgt_Container_Name IS NULL
+ OR Tgt_Object_Name IS NULL
+ OR Tgt_Kind IS NULL
+;"""
+
+
+def _generate_view_ddl(db: str, name: str) -> str:
+ """
+ Generate CREATE VIEW DDL template for user customisation.
+
+ The view body contains placeholder references that the user must
+ replace with their actual lineage source table/view.
+
+ Args:
+ db: Target database name.
+ name: Target view name.
+
+ Returns:
+ str: Teradata SQL script with placeholder source references.
+ """
+ return f"""-- ================================================================
+-- Graph Edge Contract — Edge Repository (VIEW)
+-- Generated by: Teradata MCP Server (Community Edition)
+-- Contract Version: 1.1
+--
+-- Customise the SELECT below to map your lineage source to the
+-- six required columns. The two optional enrichment columns
+-- (Edge_Relationship, Transformation_Type) are included as
+-- placeholders — map them or return NULL if not available.
+-- ================================================================
+
+REPLACE VIEW {db}.{name}
+(
+ Src_Container_Name
+ ,Src_Object_Name
+ ,Src_Kind
+ ,Tgt_Container_Name
+ ,Tgt_Object_Name
+ ,Tgt_Kind
+ -- Optional enrichment columns (NULL if not available in your source)
+ ,Edge_Relationship
+ ,Transformation_Type
+)
+AS
+LOCKING ROW FOR ACCESS
+SELECT
+ src.ContainerName AS Src_Container_Name
+ ,src.ObjectName AS Src_Object_Name
+ ,src.ObjectKind AS Src_Kind
+ ,tgt.ContainerName AS Tgt_Container_Name
+ ,tgt.ObjectName AS Tgt_Object_Name
+ ,tgt.ObjectKind AS Tgt_Kind
+ -- ============================================================
+ -- Map these to your actual columns, or use NULL if not available.
+ -- Examples:
+ -- src.RelationshipType AS Edge_Relationship
+ -- src.ProcessCategory AS Transformation_Type
+ -- ============================================================
+ ,CAST(NULL AS VARCHAR(50)) AS Edge_Relationship
+ ,CAST(NULL AS VARCHAR(50)) AS Transformation_Type
+FROM
+ -- ============================================================
+ -- Replace this with your actual lineage source.
+ -- Examples:
+ -- Your_DB.Your_Lineage_Table
+ -- A join across metadata tables
+ -- A UNION ALL of multiple lineage sources
+ -- {"{ProductName}"}_Observability.data_lineage (AI-Native Data Product)
+ -- ============================================================
+ YOUR_DATABASE.YOUR_LINEAGE_TABLE AS src
+ -- Map your source columns to the contract column aliases above.
+;
+
+COMMENT ON VIEW {db}.{name}
+ AS 'Graph Edge Contract v1.1 - edge repository view for Teradata MCP Server graph tools. 6 required columns + 2 optional enrichment columns (Edge_Relationship, Transformation_Type). Customise the source query to map your lineage data.'
+;"""
+
+
+# ──────────────────────────────────────────────────────────────────────────────── #
+# Tool registration descriptor #
+# ──────────────────────────────────────────────────────────────────────────────── #
+
+GRAPH_EDGE_CONTRACT_DDL_TOOL = {
+ "name": "graph_edgeContractDDL",
+ "handler": handle_graph_edgeContractDDL,
+ "description": (
+ "Generate Teradata DDL for a Graph Edge Contract-conforming edge "
+ "repository table or view. Call this FIRST if you don't yet have an "
+ "edge repository — all other graph_* tools require one. "
+ "No database connection is used; DDL is returned as text ready to run. "
+ "TABLE output includes separate sample DML. "
+ "VIEW output generates a customisable template covering all 8 contract "
+ "columns: 6 required (Src_Container_Name, Src_Object_Name, Src_Kind, "
+ "Tgt_Container_Name, Tgt_Object_Name, Tgt_Kind) and 2 optional "
+ "enrichment columns (Edge_Relationship, Transformation_Type) for use "
+ "by graph visualisation tools. "
+ "AI-Native Data Product shortcut: if you have an Observability Module "
+ "(v1.5+), pass {ProductName}_Semantic.lineage_graph directly as "
+ "edge_repository — it already conforms to this contract. "
+ "Contract Version: 1.1."
+ ),
+ "parameters": {
+ "target_database": {
+ "type": "string",
+ "description": (
+ "Database in which to create the edge repository. "
+ "For AI-Native Data Products this is typically "
+ "{ProductName}_Semantic. "
+ "Example: 'StGeoMortgage_Semantic'."
+ ),
+ "required": True,
+ },
+ "object_name": {
+ "type": "string",
+ "description": ("Name for the edge table or view. Default: 'EdgeRepository'."),
+ "default": "EdgeRepository",
+ },
+ "output_type": {
+ "type": "string",
+ "description": (
+ "'TABLE' (default): CREATE TABLE DDL + separate sample DML. "
+ "'VIEW': CREATE VIEW template for mapping an existing lineage source."
+ ),
+ "default": "TABLE",
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_find_root_objects.py b/src/teradata_mcp_server/tools/graph/graph_find_root_objects.py
new file mode 100644
index 0000000..196aa39
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_find_root_objects.py
@@ -0,0 +1,481 @@
+"""
+graph_findRootObjects.py — Root object discovery tool.
+
+Provides handle_graph_findRootObjects and GRAPH_FIND_ROOT_OBJECTS_TOOL.
+Queries the edge repository directly (no SP) to find objects with no upstream
+dependencies — the ideal seed points for downstream impact analysis.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import logging
+import time
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import parse_csv_patterns
+from teradata_mcp_server.tools.utils import create_response, rows_to_json
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+def handle_graph_findRootObjects(
+ conn: TeradataConnection,
+ container_pattern: str,
+ exclude_objects: str = "",
+ edge_repository: str = "",
+ object_types: str = "",
+ return_format: str = "detailed",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Find root objects (objects with no upstream dependencies) in specified containers.
+
+ Root objects are ideal starting points for downstream impact analysis as they
+ represent the foundational data sources that nothing else depends upon.
+
+ Use this for:
+ - Finding starting points for downstream impact analysis
+ - Identifying source tables and base objects in data pipelines
+ - Discovering independent objects that can be safely analysed in isolation
+ - Understanding data flow origins in a schema or database
+ - Planning migration or refactoring by identifying foundation objects
+
+ Arguments:
+ container_pattern - str: Database/schema pattern(s) to search. SUPPORTS WILDCARDS (%) and CSV.
+
+ IMPORTANT: This is a STRING parameter (type: str), not an array.
+ Pass multiple patterns as a single comma-separated string.
+
+ SINGLE CONTAINER:
+ 'DEV01_StGeo_STD_T' - Specific database
+
+ WILDCARDS (%):
+ '%WBC%' - All databases containing WBC
+ 'DEV01_%' - All databases starting with DEV01_
+ '%_STD_T' - All databases ending with _STD_T
+
+ MULTIPLE CONTAINERS (CSV format):
+ '%WBC%,%StGeo%' - All WBC and StGeo databases
+ 'DEV01_StGeo_STD_T,DEV02_WBC_STD_T' - Specific databases
+ 'DEV01_%,DEV02_%' - All DEV01 and DEV02 databases
+
+ WHITESPACE HANDLING:
+ Whitespace is automatically trimmed, so these are equivalent:
+ ✅ '%WBC%,%StGeo%' (no spaces)
+ ✅ '%WBC%, %StGeo%' (spaces after commas - OK)
+
+ HOW TO PASS IN CODE:
+ Python: container_pattern="%WBC%,%StGeo%"
+ JSON: {"container_pattern": "%WBC%,%StGeo%"}
+
+ CRITICAL: This is a STRING type parameter.
+ ✅ CORRECT: Pass as string: container_pattern="%WBC%,%StGeo%"
+ ❌ WRONG: Pass as array: container_pattern=["%WBC%", "%StGeo%"]
+
+ exclude_objects - str: Comma-separated list of patterns to exclude (SERVER-SIDE filter).
+ Matches against DatabaseName.ObjectName format.
+
+ Common exclusion patterns:
+ 'PRD_%,PROD_%' - Exclude production databases
+ '%.temp_%,%.bak_%' - Exclude temporary and backup objects
+ 'DFJ%,C_D02%' - Exclude personal/sandbox schemas
+
+ Performance: Reduces result set and improves query time
+ Default: '' (empty string = no exclusions)
+
+ edge_repository - str: Edge repository table/view conforming to the
+ Required parameter — no default.
+
+ object_types - str: Comma-separated list of object types to include (optional filter).
+ Examples: 'T' (tables), 'V' (views), 'P' (procedures), 'M' (macros)
+ Multiple: 'T,V' (tables and views only)
+ Empty = all object types included
+ Default: '' (all types)
+
+ return_format - str: Output format: 'detailed' or 'summary'
+ 'detailed' (default): Full object list with metadata
+ 'summary': High-level statistics and counts only
+ Default: 'detailed'
+
+ Returns:
+ ResponseType: formatted response with root objects + metadata
+
+ Example queries that trigger this tool:
+ - "Which objects in WBC and StGeo databases have no dependencies?"
+ - "Find root objects in DEV01 databases"
+ - "What are the starting points for impact analysis in StGeo?"
+ - "Show me base tables with no upstream dependencies"
+ - "Which objects should I start analysing for downstream impact?"
+
+ Example calls:
+ # Find root objects in WBC and StGeo databases
+ handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="%WBC%,%StGeo%"
+ )
+
+ # Find only root tables (no views/procedures)
+ handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="DEV01_%",
+ object_types="T"
+ )
+
+ # Find root objects excluding production and temporary objects
+ handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="%WBC%,%StGeo%",
+ exclude_objects="PRD_%,%.temp_%,%.bak_%"
+ )
+
+ # Quick summary of root objects
+ handle_graph_findRootObjects(
+ conn=connection,
+ container_pattern="DEV01_StGeo_STD_T",
+ return_format="summary"
+ )
+
+ Technical Implementation:
+ - Queries the edge repository to find all objects in specified containers
+ - Identifies objects that appear as sources but never as targets
+ - These are "root" objects - they have no upstream dependencies
+ - Results are filtered by exclude_objects and object_types parameters
+ - Returns list of root objects suitable for downstream impact analysis
+ """
+ logger.debug(
+ "Tool: handle_graph_findRootObjects: Args: "
+ "container_pattern=%s, exclude_objects=%s, edge_repository=%s, "
+ "object_types=%s, return_format=%s",
+ container_pattern,
+ exclude_objects,
+ edge_repository,
+ object_types,
+ return_format,
+ )
+
+ if not edge_repository:
+ return create_response(
+ {"error": "edge_repository is required. Call graph_edgeContractDDL to generate one."},
+ {
+ "tool_name": tool_name or "graph_findRootObjects",
+ "status": "error",
+ },
+ )
+
+ try:
+ with conn.cursor() as cur:
+ # Build the SQL query to find root objects using NOT EXISTS
+ # Root objects are those that appear as sources but never as targets
+ # (i.e., they have no upstream dependencies)
+
+ # Parse container patterns (CSV support)
+ container_patterns = parse_csv_patterns(container_pattern)
+
+ # Build LIKE clauses for container patterns - used in main WHERE and NOT EXISTS
+ container_conditions = []
+ for pattern in container_patterns:
+ container_conditions.append(f"Src_Container_Name LIKE '{pattern}'")
+
+ container_where = " OR ".join(container_conditions)
+
+ # Build exclusion conditions if provided
+ exclusion_where = ""
+ if exclude_objects:
+ exclude_patterns = parse_csv_patterns(exclude_objects)
+ exclusion_conditions = []
+ for pattern in exclude_patterns:
+ # Check if pattern contains a dot (fully qualified) or just database pattern
+ if "." in pattern:
+ # Fully qualified pattern like 'DB.Object'
+ db_part, obj_part = pattern.split(".", 1)
+ exclusion_conditions.append(
+ f"(o1.Src_Container_Name LIKE '{db_part}' AND o1.Src_Object_Name LIKE '{obj_part}')"
+ )
+ else:
+ # Database-only pattern like 'PRD_%'
+ exclusion_conditions.append(f"o1.Src_Container_Name LIKE '{pattern}'")
+
+ if exclusion_conditions:
+ exclusion_where = " AND NOT (" + " OR ".join(exclusion_conditions) + ")"
+
+ # Build object type filter if provided
+ type_where = ""
+ if object_types:
+ type_list = [f"'{t.strip()}'" for t in object_types.split(",") if t.strip()]
+ if type_list:
+ type_where = f" AND o1.Src_Kind IN ({','.join(type_list)})"
+
+ import time
+
+ start_time = time.time()
+ # Main query to find root objects using NOT EXISTS
+ # This is more efficient than NOT IN for large datasets
+ # The query finds objects that exist as sources but never as targets
+ sql = f"""
+LOCKING ROW FOR ACCESS
+SELECT DISTINCT
+ o1.Src_Container_Name AS DatabaseName,
+ o1.Src_Object_Name AS ObjectName,
+ TRIM(o1.Src_Container_Name) || '.' || TRIM(o1.Src_Object_Name) AS FullyQualifiedName,
+ o1.Src_Kind AS ObjectType,
+ COUNT(DISTINCT o1.Tgt_Container_Name || '.' || o1.Tgt_Object_Name) AS DownstreamDependentCount
+FROM {edge_repository} o1
+WHERE ({container_where})
+ {exclusion_where}
+ {type_where}
+ AND NOT EXISTS (
+ SELECT 1
+ FROM {edge_repository} o2
+ WHERE o2.Tgt_Container_Name = o1.Src_Container_Name
+ AND o2.Tgt_Object_Name = o1.Src_Object_Name
+ AND ({container_where.replace("Src_Container_Name", "o2.Src_Container_Name")})
+ )
+GROUP BY
+ o1.Src_Container_Name,
+ o1.Src_Object_Name,
+ o1.Src_Kind
+ORDER BY
+ DownstreamDependentCount DESC,
+ o1.Src_Container_Name,
+ o1.Src_Object_Name
+ """
+
+ logger.debug("Tool: handle_graph_findRootObjects: Executing SQL:\n%s", sql)
+
+ # Execute query
+ cur.execute(sql)
+
+ query_time = time.time() - start_time
+ logger.debug("Tool: handle_graph_findRootObjects: Query execution took %.2fs", query_time)
+
+ # Fetch all results and convert to list of dictionaries
+ # NOTE: rows_to_json takes (description, rows) - description FIRST!
+ root_objects = rows_to_json(cur.description, cur.fetchall())
+
+ logger.debug("Tool: handle_graph_findRootObjects: Found %d root objects", len(root_objects))
+ if root_objects:
+ logger.debug("Tool: handle_graph_findRootObjects: First object: %s", root_objects[0])
+
+ # Safety check: ensure root_objects is a list of dicts, not a string
+ if not isinstance(root_objects, list):
+ logger.error(
+ "Tool: handle_graph_findRootObjects: root_objects is not a list — type: %s", type(root_objects)
+ )
+ root_objects = []
+
+ # Format results based on return_format
+ if return_format == "summary":
+ formatted_data = _format_root_summary(root_objects, container_pattern)
+ else: # detailed
+ formatted_data = {
+ "root_objects": root_objects,
+ "summary": _create_root_summary_stats(root_objects, container_pattern),
+ }
+
+ # Build metadata
+ metadata = {
+ "tool_name": tool_name if tool_name else "graph_findRootObjects",
+ "container_pattern": container_pattern,
+ "exclude_objects": exclude_objects,
+ "object_types": object_types,
+ "edge_repository": edge_repository,
+ "return_format": return_format,
+ "sql": sql,
+ "columns": [{"name": desc[0], "type": "str"} for desc in cur.description],
+ "row_count": len(root_objects),
+ "status": "success",
+ }
+
+ logger.debug("Tool: handle_graph_findRootObjects: metadata: %s", metadata)
+ return create_response(formatted_data, metadata)
+
+ except Exception as e:
+ logger.error("Tool: handle_graph_findRootObjects: Error: %s", e, exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": tool_name if tool_name else "graph_findRootObjects",
+ "container_pattern": container_pattern,
+ "status": "error",
+ },
+ )
+
+
+def _create_root_summary_stats(root_objects: list, container_pattern: str) -> dict:
+ """
+ Create summary statistics for root objects analysis.
+
+ Arguments:
+ root_objects - List of root object dictionaries
+ container_pattern - Container pattern(s) searched
+
+ Returns:
+ Dictionary with summary statistics
+ """
+ # Count by object type
+ type_counts: dict[str, int] = {}
+ for obj in root_objects:
+ obj_type = obj.get("ObjectType", "Unknown")
+ type_counts[obj_type] = type_counts.get(obj_type, 0) + 1
+
+ # Count by database
+ db_counts: dict[str, int] = {}
+ for obj in root_objects:
+ db_name = obj.get("DatabaseName", "Unknown")
+ db_counts[db_name] = db_counts.get(db_name, 0) + 1
+
+ # Calculate total downstream dependencies
+ total_downstream = sum(
+ int(obj.get("DownstreamDependentCount", 0))
+ if isinstance(obj.get("DownstreamDependentCount"), str)
+ else obj.get("DownstreamDependentCount", 0)
+ for obj in root_objects
+ )
+
+ # Find objects with most downstream dependencies
+ top_objects = sorted(
+ root_objects,
+ key=lambda x: (
+ int(x.get("DownstreamDependentCount", 0))
+ if isinstance(x.get("DownstreamDependentCount"), str)
+ else x.get("DownstreamDependentCount", 0)
+ ),
+ reverse=True,
+ )[:10]
+
+ return {
+ "total_root_objects": len(root_objects),
+ "container_pattern": container_pattern,
+ "object_type_counts": type_counts,
+ "database_counts": db_counts,
+ "total_downstream_dependencies": total_downstream,
+ "average_downstream_per_root": round(total_downstream / len(root_objects), 2) if root_objects else 0,
+ "top_impact_objects": [
+ {
+ "name": obj.get("FullyQualifiedName"),
+ "type": obj.get("ObjectType"),
+ "downstream_count": obj.get("DownstreamDependentCount"),
+ }
+ for obj in top_objects
+ ],
+ }
+
+
+def _format_root_summary(root_objects: list, container_pattern: str) -> dict:
+ """
+ Format a concise summary of root objects analysis.
+
+ Arguments:
+ root_objects - List of root object dictionaries
+ container_pattern - Container pattern(s) searched
+
+ Returns:
+ Dictionary with formatted summary
+ """
+ stats = _create_root_summary_stats(root_objects, container_pattern)
+
+ summary_text = f"""
+ROOT OBJECTS ANALYSIS SUMMARY
+{"=" * 60}
+
+Container Pattern(s): {container_pattern}
+
+OVERVIEW
+ Total Root Objects Found: {stats["total_root_objects"]}
+ Total Downstream Impact: {stats["total_downstream_dependencies"]} objects
+ Avg Downstream per Root: {stats["average_downstream_per_root"]}
+
+DEFINITION
+ Root objects are objects with NO upstream dependencies.
+ They represent foundational data sources and are ideal
+ starting points for downstream impact analysis.
+"""
+
+ if stats["object_type_counts"]:
+ summary_text += "\nBY OBJECT TYPE\n"
+ for obj_type, count in sorted(stats["object_type_counts"].items(), key=lambda x: x[1], reverse=True):
+ summary_text += f" {obj_type:20s} {count:3d}\n"
+
+ if stats["database_counts"]:
+ summary_text += "\nBY DATABASE\n"
+ for db_name, count in sorted(stats["database_counts"].items(), key=lambda x: x[1], reverse=True)[:10]:
+ summary_text += f" {db_name:40s} {count:3d}\n"
+
+ if len(stats["database_counts"]) > 10:
+ summary_text += f" ... and {len(stats['database_counts']) - 10} more databases\n"
+
+ if stats["top_impact_objects"]:
+ summary_text += "\nTOP 10 ROOT OBJECTS BY DOWNSTREAM IMPACT\n"
+ for i, obj in enumerate(stats["top_impact_objects"], 1):
+ summary_text += f" {i:2d}. {obj['name']:50s} ({obj['type']}) → {obj['downstream_count']} dependents\n"
+
+ summary_text += """
+RECOMMENDATION
+ Start your downstream impact analysis with the objects listed above,
+ particularly those with higher downstream dependent counts, as they
+ represent foundational objects with broader impact scope.
+"""
+
+ return {
+ "summary_text": summary_text,
+ "statistics": stats,
+ "root_object_names": [obj.get("FullyQualifiedName") for obj in root_objects],
+ }
+
+
+# ------------------------------------------------------------------
+# Tool registration descriptor
+# ------------------------------------------------------------------
+GRAPH_FIND_ROOT_OBJECTS_TOOL = {
+ "name": "graph_findRootObjects",
+ "handler": handle_graph_findRootObjects,
+ "description": (
+ "Find root objects — objects with no upstream dependencies — in the "
+ "specified containers. Root objects are foundational data sources and "
+ "ideal starting points for downstream impact analysis or migration wave "
+ "planning. Results are ordered by downstream dependent count descending. "
+ "Use graph_bfsLevels after this tool to compute hop distances from the "
+ "identified root objects. "
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "If you don't have one yet, call graph_edgeContractDDL first to "
+ "generate the CREATE TABLE or CREATE VIEW DDL."
+ ),
+ "parameters": {
+ "container_pattern": {
+ "type": "string",
+ "description": (
+ "CSV LIKE patterns for databases/schemas to search. Supports wildcards: '%WBC%' or '%WBC%,%StGeo%'."
+ ),
+ "required": True,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": ("CSV of FQ object name LIKE patterns to exclude. Example: 'PRD_%,%.temp_%'. Default: ''."),
+ "default": "",
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required parameter — no default."
+ ),
+ "required": True,
+ },
+ "object_types": {
+ "type": "string",
+ "description": (
+ "CSV of object type codes to include. Example: 'Table' or 'Table,View'. Default: '' (all types)."
+ ),
+ "default": "",
+ },
+ "return_format": {
+ "type": "string",
+ "description": "Output format: 'detailed' (default) or 'summary'.",
+ "default": "detailed",
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/graph/graph_tools.py b/src/teradata_mcp_server/tools/graph/graph_tools.py
new file mode 100644
index 0000000..39c9fb8
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_tools.py
@@ -0,0 +1,180 @@
+"""
+graph_tools.py — Registration aggregator for graph analysis tools.
+
+──────────────────────────────────────────────────────────────────────
+WHY THIS FILE EXISTS AND WHY IT IS STRUCTURED THIS WAY
+──────────────────────────────────────────────────────────────────────
+
+This file is intentionally a THIN HUB. It contains no handler logic,
+no SQL, and no business rules. Its only job is to import handlers and
+descriptors from the individual tool modules in the graph/ sub-package
+and expose them as a single GRAPH_TOOLS list for MCP server registration.
+
+This structure was adopted for the following reasons:
+
+1. VERSION CONTROL
+ Each tool lives in its own file. A git diff for a bug fix or feature
+ change touches exactly one tool file — not a 2,000+ line monolith.
+ PR reviews are scoped. Blame history is meaningful. Bisecting a
+ regression is straightforward.
+
+2. INDEPENDENT DEPLOYMENT
+ A hotfix to graph_bfsLevels can be deployed by copying one file.
+ There is no risk of inadvertently shipping changes to other tools
+ alongside an unrelated fix.
+
+3. PARALLEL DEVELOPMENT
+ Multiple engineers can work on different tools simultaneously without
+ merge conflicts. Separate files eliminate the constant collision source
+ that a shared monolith creates.
+
+4. TESTABILITY
+ Each tool file can be unit-tested in isolation. A test for
+ graph_bfsLevels only needs to import that one module and mock the
+ connection — it does not pull in other tools, their imports, or their
+ dependencies.
+
+5. SEPARATION OF CONCERNS
+ Tool logic, shared utilities, and server registration are three
+ distinct concerns. They now live in three distinct places:
+ graph/.py — handler logic + descriptor
+ graph/_graph_utils.py — shared BFS helpers (internal, not a tool)
+ graph_tools.py — this file: registration only
+
+──────────────────────────────────────────────────────────────────────
+PACKAGE STRUCTURE
+──────────────────────────────────────────────────────────────────────
+
+ teradata_mcp_server/tools/
+ ├── graph_tools.py ← YOU ARE HERE (hub only)
+ ├── graph/
+ │ ├── __init__.py
+ │ ├── _graph_utils.py ← shared helpers (bfs_safe_int,
+ │ │ create_bfs_summary,
+ │ │ extract_cycle_candidates)
+ │ ├── graph_traceLineage.py ← hybrid: Python CTEs, server-side traversal
+ │ ├── graph_findRootObjects.py ← SQL-only root object discovery
+ │ ├── graph_detectCycles.py ← Python: Union-Find + iterative DFS
+ │ ├── graph_connectedComponents.py ← Python: Union-Find WCC analysis
+ │ └── graph_bfsLevels.py ← Python BFS (no SP dependency)
+ └── utils.py ← shared MCP utilities (create_response etc.)
+
+──────────────────────────────────────────────────────────────────────
+ADDING A NEW TOOL
+──────────────────────────────────────────────────────────────────────
+
+ 1. Create graph/graph_.py following the existing module
+ pattern (module docstring, imports, handler, descriptor constant).
+ 2. Import the handler and descriptor here (two lines below).
+ 3. Add the descriptor to GRAPH_TOOLS (one line below).
+ 4. Create tests/tools/graph/test_graph_.py.
+
+Nothing else changes — the MCP server consumes GRAPH_TOOLS unchanged.
+
+──────────────────────────────────────────────────────────────────────
+SP-FREE ARCHITECTURE — ALL TOOLS
+──────────────────────────────────────────────────────────────────────
+
+All graph tools in this package are free of stored procedure (SP)
+dependencies. No Teradata DDL objects are required beyond read access
+to the edge repository view/table. The implementation strategies are:
+
+ graph_findRootObjects
+ Pure SQL SELECT — NOT EXISTS subquery identifies objects with no
+ upstream dependencies. No Python algorithm required.
+
+ graph_bfsLevels
+ Pure Python — one bulk edge SELECT, then standard queue-based BFS
+ (O(V+E)) in the MCP server process. Replaced an SP-based
+ Bellman-Ford SQL relaxation loop.
+
+ graph_detectCycles
+ Pure Python — one scoped edge SELECT, then Union-Find WCC
+ partitioning followed by iterative DFS (grey/black colouring).
+ Iterative DFS avoids Python's recursion limit on deep graphs.
+
+ graph_connectedComponents
+ Pure Python — one scoped edge SELECT, then path-compressed
+ Union-Find assigns every node to a component in O(α·N) time.
+
+ graph_traceLineage
+ Hybrid — Python constructs Teradata recursive CTEs and executes
+ them as plain SELECT statements. The recursive traversal runs
+ entirely in Teradata spool (server-side), returning only the
+ reachable subgraph across the network. Python owns orchestration,
+ deduplication, and response assembly. This approach avoids
+ transferring the full edge table when only a small subgraph is
+ needed — critical at scale (100 000+ edges).
+
+The only Teradata privilege required across all tools is SELECT on
+the edge_repository view/table.
+
+──────────────────────────────────────────────────────────────────────
+"""
+
+import logging
+
+from teradata_mcp_server.tools.graph.graph_analyse_database import (
+ GRAPH_ANALYSE_DATABASE_TOOL,
+ handle_graph_analyseDatabase,
+)
+from teradata_mcp_server.tools.graph.graph_bfs_levels import (
+ GRAPH_BFS_LEVELS_TOOL,
+ handle_graph_bfsLevels,
+)
+from teradata_mcp_server.tools.graph.graph_connected_components import (
+ GRAPH_CONNECTED_COMPONENTS_TOOL,
+ handle_graph_connectedComponents,
+)
+from teradata_mcp_server.tools.graph.graph_detect_cycles import (
+ GRAPH_DETECT_CYCLES_TOOL,
+ handle_graph_detectCycles,
+)
+from teradata_mcp_server.tools.graph.graph_edge_contract import (
+ GRAPH_EDGE_CONTRACT_DDL_TOOL,
+ handle_graph_edgeContractDDL,
+)
+
+# ── Individual tool imports ────────────────────────────────────────────────
+#
+# Each import pair brings in:
+# handle_* — the callable handler passed to the MCP framework
+# *_TOOL — the descriptor dict (name, handler ref, description, parameters)
+#
+# Import order matches logical workflow:
+# findRootObjects → bfsLevels → traceLineage → detectCycles → connectedComponents → analyseDatabase
+from teradata_mcp_server.tools.graph.graph_find_root_objects import (
+ GRAPH_FIND_ROOT_OBJECTS_TOOL,
+ handle_graph_findRootObjects,
+)
+from teradata_mcp_server.tools.graph.graph_trace_lineage import (
+ GRAPH_TRACE_LINEAGE_TOOL,
+ handle_graph_traceLineage,
+)
+
+logger = logging.getLogger("teradata_mcp_server")
+
+# ── Tool registry ──────────────────────────────────────────────────────────
+#
+# GRAPH_TOOLS is the single list consumed by the MCP server at startup.
+# The server iterates this list and registers each tool's name, handler,
+# and parameter schema with the MCP protocol layer.
+#
+# Order here controls the order tools appear in MCP tool listings.
+# Workflow order (roots → BFS → dependencies → cycles → components)
+# makes the listing intuitive for both humans and AI agents.
+#
+# To disable a tool temporarily: comment out its entry here.
+# To add a new tool: append its descriptor (see ADDING A NEW TOOL above).
+
+GRAPH_TOOLS = [
+ GRAPH_EDGE_CONTRACT_DDL_TOOL, # Step 0 — generate edge repository DDL
+ GRAPH_FIND_ROOT_OBJECTS_TOOL, # Step 1 — discover seed objects
+ GRAPH_BFS_LEVELS_TOOL, # Step 2 — wave planning + blast radius
+ GRAPH_TRACE_LINEAGE_TOOL, # Step 3 — full lineage + impact paths
+ GRAPH_DETECT_CYCLES_TOOL, # Step 4 — cycle validation
+ GRAPH_CONNECTED_COMPONENTS_TOOL, # Step 5 — graph partitioning
+ GRAPH_ANALYSE_DATABASE_TOOL, # Step 6 — composite single-fetch analysis
+]
+
+logger.debug("graph_tools: registered %d tools: %s", len(GRAPH_TOOLS), [t["name"] for t in GRAPH_TOOLS])
diff --git a/src/teradata_mcp_server/tools/graph/graph_trace_lineage.py b/src/teradata_mcp_server/tools/graph/graph_trace_lineage.py
new file mode 100644
index 0000000..fadddd5
--- /dev/null
+++ b/src/teradata_mcp_server/tools/graph/graph_trace_lineage.py
@@ -0,0 +1,807 @@
+"""
+graph_traceLineage.py — Dependency lineage analysis tool.
+
+Provides handle_graph_traceLineage and GRAPH_TRACE_LINEAGE_TOOL.
+
+Hybrid implementation — no stored procedure required.
+
+Design:
+ Python constructs and executes parameterised Teradata recursive CTEs as plain
+ SELECT statements. The recursive traversal runs entirely in Teradata spool
+ (server-side), so only the reachable subgraph crosses the network — not the
+ full edge table. Python owns all orchestration, filtering, response assembly,
+ and format selection.
+
+ This approach satisfies two competing constraints simultaneously:
+ 1. No stored procedure — no Teradata DDL, no REPLACE PROCEDURE privilege,
+ no server-side objects to deploy or version.
+ 2. No full-table transfer at scale — a graph with 100 000 edges is queried
+ with only the reachable subgraph returned per invocation.
+
+Recursive CTE direction convention (matches Edge Repository / graph_bfsLevels):
+ Edge Repository row: Src is REFERENCED BY Tgt.
+ => Src is the DEPENDENCY (upstream of Tgt).
+ => Tgt is the DEPENDENT (downstream of Src).
+
+ Upstream CTE — "what does my seed depend on?":
+ Anchor on seed as Tgt; recurse by following Src side outward.
+
+ Downstream CTE — "what depends on my seed?":
+ Anchor on seed as Src; recurse by following Tgt side outward.
+
+Author: Paul Dancer — Teradata Global Field Tech
+"""
+
+import logging
+
+from teradatasql import TeradataConnection
+
+from teradata_mcp_server.tools.graph._graph_utils import parse_csv_patterns
+from teradata_mcp_server.tools.utils import create_response, rows_to_json
+
+logger = logging.getLogger("teradata_mcp_server")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers — pattern parsing
+# ---------------------------------------------------------------------------
+# parse_csv_patterns is imported from _graph_utils.
+# _build_or_like is kept local — it covers both Src and Tgt columns
+# simultaneously, which is a different pattern from build_like_or.
+
+
+def _build_or_like(patterns: list[str], src_col: str, tgt_col: str) -> str:
+ """
+ Build an OR-joined pair of LIKE clauses covering both Src and Tgt columns.
+
+ Used to scope the recursive CTE anchor and recursion steps so that only
+ edges touching the requested containers participate.
+
+ Arguments:
+ patterns - List of LIKE pattern strings for container names
+ src_col - SQL column name for the source container
+ tgt_col - SQL column name for the target container
+
+ Returns:
+ SQL fragment, e.g.
+ "({src_col} LIKE 'A%' OR {tgt_col} LIKE 'A%'
+ OR {src_col} LIKE 'B%' OR {tgt_col} LIKE 'B%')"
+ Returns empty string if patterns is empty (no filtering).
+ """
+ if not patterns:
+ return ""
+ clauses = []
+ for p in patterns:
+ clauses.append(f"{src_col} LIKE '{p}'")
+ clauses.append(f"{tgt_col} LIKE '{p}'")
+ return "AND (" + " OR ".join(clauses) + ")"
+
+
+def _build_excl_fragment(patterns: list[str], db_col: str, obj_col: str) -> str:
+ """
+ Build a NOT (...) exclusion fragment for object-level filtering.
+
+ A pattern containing a dot is treated as a fully-qualified DB.Object
+ pattern; a plain pattern is matched against the container/DB column only.
+
+ Arguments:
+ patterns - List of exclusion LIKE patterns
+ db_col - SQL column holding the database/schema name
+ obj_col - SQL column holding the object name
+
+ Returns:
+ SQL fragment beginning with "AND NOT (...)" or empty string
+ """
+ if not patterns:
+ return ""
+
+ conditions = []
+ for p in patterns:
+ if "." in p:
+ db_part, obj_part = p.split(".", 1)
+ conditions.append(f"({db_col} LIKE '{db_part}' AND {obj_col} LIKE '{obj_part}')")
+ else:
+ conditions.append(f"{db_col} LIKE '{p}'")
+
+ return "AND NOT (" + " OR ".join(conditions) + ")"
+
+
+# ---------------------------------------------------------------------------
+# CTE builders
+# ---------------------------------------------------------------------------
+
+
+def _build_upstream_cte(
+ seed_pattern: str,
+ max_depth: int,
+ edge_table: str,
+ incl_fragment: str,
+ excl_fragment: str,
+) -> str:
+ """
+ Build a Teradata recursive CTE that traverses upstream from a seed pattern.
+
+ "Upstream" means: what does my seed DEPEND ON? In Edge Repository terms,
+ when a row has Tgt matching the seed, Src is the upstream dependency.
+ The anchor selects rows where Tgt matches the seed; recursion follows
+ the Src side outward (each discovered Src becomes the next Tgt to search).
+
+ Arguments:
+ seed_pattern - LIKE pattern for the seed object (DB.Object format)
+ max_depth - Maximum hop count to traverse
+ edge_table - Fully-qualified edge repository view/table name
+ incl_fragment - SQL fragment for container inclusion ("AND (...)") or ''
+ excl_fragment - SQL fragment for object exclusion ("AND NOT (...)") or ''
+
+ Returns:
+ Complete WITH RECURSIVE ... SELECT statement as a string
+ """
+ return f"""
+WITH RECURSIVE UpstreamBFS
+ (
+ Src_DB
+ ,Src_Obj
+ ,Src_Kind
+ ,Tgt_DB
+ ,Tgt_Obj
+ ,Tgt_Kind
+ ,Depth
+ ,Path_Str
+ ) AS
+(
+ -- ----------------------------------------------------------------
+ -- Anchor: edges where the target matches the seed pattern
+ -- ----------------------------------------------------------------
+ SELECT
+ TRIM(e.Src_Container_Name)
+ ,TRIM(e.Src_Object_Name)
+ ,COALESCE(TRIM(e.Src_Kind), 'Unknown')
+ ,TRIM(e.Tgt_Container_Name)
+ ,TRIM(e.Tgt_Object_Name)
+ ,COALESCE(TRIM(e.Tgt_Kind), 'Unknown')
+ ,CAST(1 AS INTEGER)
+ ,CAST(
+ TRIM(e.Src_Container_Name) || '.' || TRIM(e.Src_Object_Name)
+ || ' <- '
+ || TRIM(e.Tgt_Container_Name) || '.' || TRIM(e.Tgt_Object_Name)
+ AS VARCHAR(8000)
+ )
+ FROM {edge_table} e
+ WHERE (TRIM(e.Tgt_Container_Name) || '.' || TRIM(e.Tgt_Object_Name))
+ LIKE '{seed_pattern}'
+ {incl_fragment}
+ {excl_fragment}
+
+ UNION ALL
+
+ -- ----------------------------------------------------------------
+ -- Recursion: follow the Src side of each already-discovered edge
+ -- ----------------------------------------------------------------
+ SELECT
+ TRIM(e.Src_Container_Name)
+ ,TRIM(e.Src_Object_Name)
+ ,COALESCE(TRIM(e.Src_Kind), 'Unknown')
+ ,TRIM(e.Tgt_Container_Name)
+ ,TRIM(e.Tgt_Object_Name)
+ ,COALESCE(TRIM(e.Tgt_Kind), 'Unknown')
+ ,b.Depth + 1
+ ,CAST(
+ TRIM(e.Src_Container_Name) || '.' || TRIM(e.Src_Object_Name)
+ || ' <- '
+ || b.Path_Str
+ AS VARCHAR(8000)
+ )
+ FROM {edge_table} e
+ INNER JOIN UpstreamBFS b
+ ON TRIM(e.Tgt_Container_Name) = b.Src_DB
+ AND TRIM(e.Tgt_Object_Name) = b.Src_Obj
+ WHERE b.Depth < {max_depth}
+ {incl_fragment}
+ {excl_fragment}
+)
+SELECT
+ Src_DB AS DependentObjectDBName
+ ,Src_Obj AS DependentObjectName
+ ,Src_DB || '.' || Src_Obj AS FQDependentObjectName
+ ,Tgt_DB AS ReferencedObjectDBName
+ ,Tgt_Obj AS ReferencedObjectName
+ ,Tgt_DB || '.' || Tgt_Obj AS FQReferencedObjectName
+ ,Src_Kind AS Src_Kind
+ ,Tgt_Kind AS Tgt_Kind
+ ,CAST(Depth * -1 AS INTEGER) AS Depth
+ ,Path_Str AS DependencyPath
+FROM UpstreamBFS
+ORDER BY Depth ASC, FQDependentObjectName
+"""
+
+
+def _build_downstream_cte(
+ seed_pattern: str,
+ max_depth: int,
+ edge_table: str,
+ incl_fragment: str,
+ excl_fragment: str,
+) -> str:
+ """
+ Build a Teradata recursive CTE that traverses downstream from a seed pattern.
+
+ "Downstream" means: what DEPENDS ON my seed? In Edge Repository terms,
+ when a row has Src matching the seed, Tgt is the downstream dependent.
+ The anchor selects rows where Src matches the seed; recursion follows
+ the Tgt side outward (each discovered Tgt becomes the next Src to search).
+
+ Arguments:
+ seed_pattern - LIKE pattern for the seed object (DB.Object format)
+ max_depth - Maximum hop count to traverse
+ edge_table - Fully-qualified edge repository view/table name
+ incl_fragment - SQL fragment for container inclusion ("AND (...)") or ''
+ excl_fragment - SQL fragment for object exclusion ("AND NOT (...)") or ''
+
+ Returns:
+ Complete WITH RECURSIVE ... SELECT statement as a string
+ """
+ return f"""
+WITH RECURSIVE DownstreamBFS
+ (
+ Src_DB
+ ,Src_Obj
+ ,Src_Kind
+ ,Tgt_DB
+ ,Tgt_Obj
+ ,Tgt_Kind
+ ,Depth
+ ,Path_Str
+ ) AS
+(
+ -- ----------------------------------------------------------------
+ -- Anchor: edges where the source matches the seed pattern
+ -- ----------------------------------------------------------------
+ SELECT
+ TRIM(e.Src_Container_Name)
+ ,TRIM(e.Src_Object_Name)
+ ,COALESCE(TRIM(e.Src_Kind), 'Unknown')
+ ,TRIM(e.Tgt_Container_Name)
+ ,TRIM(e.Tgt_Object_Name)
+ ,COALESCE(TRIM(e.Tgt_Kind), 'Unknown')
+ ,CAST(1 AS INTEGER)
+ ,CAST(
+ TRIM(e.Src_Container_Name) || '.' || TRIM(e.Src_Object_Name)
+ || ' -> '
+ || TRIM(e.Tgt_Container_Name) || '.' || TRIM(e.Tgt_Object_Name)
+ AS VARCHAR(8000)
+ )
+ FROM {edge_table} e
+ WHERE (TRIM(e.Src_Container_Name) || '.' || TRIM(e.Src_Object_Name))
+ LIKE '{seed_pattern}'
+ {incl_fragment}
+ {excl_fragment}
+
+ UNION ALL
+
+ -- ----------------------------------------------------------------
+ -- Recursion: follow the Tgt side of each already-discovered edge
+ -- ----------------------------------------------------------------
+ SELECT
+ TRIM(e.Src_Container_Name)
+ ,TRIM(e.Src_Object_Name)
+ ,COALESCE(TRIM(e.Src_Kind), 'Unknown')
+ ,TRIM(e.Tgt_Container_Name)
+ ,TRIM(e.Tgt_Object_Name)
+ ,COALESCE(TRIM(e.Tgt_Kind), 'Unknown')
+ ,b.Depth + 1
+ ,CAST(
+ b.Path_Str
+ || ' -> '
+ || TRIM(e.Tgt_Container_Name) || '.' || TRIM(e.Tgt_Object_Name)
+ AS VARCHAR(8000)
+ )
+ FROM {edge_table} e
+ INNER JOIN DownstreamBFS b
+ ON TRIM(e.Src_Container_Name) = b.Tgt_DB
+ AND TRIM(e.Src_Object_Name) = b.Tgt_Obj
+ WHERE b.Depth < {max_depth}
+ {incl_fragment}
+ {excl_fragment}
+)
+SELECT
+ Tgt_DB AS DependentObjectDBName
+ ,Tgt_Obj AS DependentObjectName
+ ,Tgt_DB || '.' || Tgt_Obj AS FQDependentObjectName
+ ,Src_DB AS ReferencedObjectDBName
+ ,Src_Obj AS ReferencedObjectName
+ ,Src_DB || '.' || Src_Obj AS FQReferencedObjectName
+ ,Src_Kind AS Src_Kind
+ ,Tgt_Kind AS Tgt_Kind
+ ,CAST(Depth AS INTEGER) AS Depth
+ ,Path_Str AS DependencyPath
+FROM DownstreamBFS
+ORDER BY Depth ASC, FQDependentObjectName
+"""
+
+
+# ---------------------------------------------------------------------------
+# Node / summary helpers — identical contract to the SP-based version
+# ---------------------------------------------------------------------------
+
+
+def _safe_int(value) -> int:
+ """
+ Safely convert a value to int, returning 0 on failure.
+
+ Arguments:
+ value - Any value (may be Teradata BYTEINT returned as string)
+
+ Returns:
+ int
+ """
+ try:
+ return int(value) if value is not None else 0
+ except (ValueError, TypeError):
+ return 0
+
+
+def _derive_nodes_from_edges(
+ edges_up: list[dict],
+ edges_down: list[dict],
+) -> list[dict]:
+ """
+ Derive unique nodes from edge lists.
+
+ Deduplicates by FQDependentObjectName, preferring the upstream record when
+ a node appears in both directions.
+
+ Arguments:
+ edges_up - List of upstream edge dicts
+ edges_down - List of downstream edge dicts
+
+ Returns:
+ List of unique node dicts
+ """
+ nodes: dict[str, dict] = {}
+
+ for edge in edges_up:
+ fq = edge.get("FQDependentObjectName")
+ if fq and fq not in nodes:
+ nodes[fq] = {
+ "FQDependentObjectName": fq,
+ "DependentObjectDBName": edge.get("DependentObjectDBName"),
+ "DependentObjectName": edge.get("DependentObjectName"),
+ "Direction": "Upstream",
+ "Depth": _safe_int(edge.get("Depth", 0)),
+ "ObjectType": edge.get("Src_Kind") or edge.get("Tgt_Kind"),
+ }
+
+ for edge in edges_down:
+ fq = edge.get("FQDependentObjectName")
+ if fq and fq not in nodes:
+ nodes[fq] = {
+ "FQDependentObjectName": fq,
+ "DependentObjectDBName": edge.get("DependentObjectDBName"),
+ "DependentObjectName": edge.get("DependentObjectName"),
+ "Direction": "Downstream",
+ "Depth": _safe_int(edge.get("Depth", 0)),
+ "ObjectType": edge.get("Src_Kind") or edge.get("Tgt_Kind"),
+ }
+
+ return list(nodes.values())
+
+
+def _create_summary_stats(
+ nodes: list[dict],
+ edges_up: list[dict],
+ edges_down: list[dict],
+) -> dict:
+ """
+ Create summary statistics from dependency data.
+
+ Arguments:
+ nodes - List of node dicts
+ edges_up - List of upstream edge dicts
+ edges_down - List of downstream edge dicts
+
+ Returns:
+ Dictionary of summary statistics
+ """
+ upstream_nodes = [n for n in nodes if n.get("Direction") == "Upstream"]
+ downstream_nodes = [n for n in nodes if n.get("Direction") == "Downstream"]
+
+ type_counts: dict[str, int] = {}
+ for node in nodes:
+ kind = node.get("ObjectType", "Unknown") or "Unknown"
+ type_counts[kind] = type_counts.get(kind, 0) + 1
+
+ return {
+ "total_nodes": len(nodes),
+ "upstream_nodes": len(upstream_nodes),
+ "downstream_nodes": len(downstream_nodes),
+ "total_edges": len(edges_up) + len(edges_down),
+ "upstream_edges": len(edges_up),
+ "downstream_edges": len(edges_down),
+ "max_depth_upstream": max((abs(_safe_int(n.get("Depth", 0))) for n in upstream_nodes), default=0),
+ "max_depth_downstream": max((_safe_int(n.get("Depth", 0)) for n in downstream_nodes), default=0),
+ "object_type_counts": type_counts,
+ }
+
+
+def _format_summary(
+ nodes: list[dict],
+ edges_up: list[dict],
+ edges_down: list[dict],
+ object_name: str,
+) -> dict:
+ """
+ Format a concise summary of dependency analysis.
+
+ Arguments:
+ nodes - List of node dicts
+ edges_up - List of upstream edge dicts
+ edges_down - List of downstream edge dicts
+ object_name - Object name pattern(s) analysed (may be CSV)
+
+ Returns:
+ Dictionary with summary_text, statistics, upstream_objects, downstream_objects
+ """
+ stats = _create_summary_stats(nodes, edges_up, edges_down)
+ upstream_nodes = [n for n in nodes if n.get("Direction") == "Upstream"]
+ downstream_nodes = [n for n in nodes if n.get("Direction") == "Downstream"]
+
+ summary_text = f"""
+DEPENDENCY ANALYSIS SUMMARY
+{"=" * 60}
+
+Object Pattern(s): {object_name}
+
+OVERVIEW
+ Total Nodes: {stats["total_nodes"]}
+ Total Edges: {stats["total_edges"]}
+
+UPSTREAM (What These Objects Depend On)
+ Dependencies Found: {stats["upstream_nodes"]}
+ Edges: {stats["upstream_edges"]}
+ Max Depth Reached: {stats["max_depth_upstream"]}
+
+DOWNSTREAM (What Depends On These Objects)
+ Dependents Found: {stats["downstream_nodes"]}
+ Edges: {stats["downstream_edges"]}
+ Max Depth Reached: {stats["max_depth_downstream"]}
+"""
+
+ if stats["object_type_counts"]:
+ summary_text += "\nBY OBJECT TYPE\n"
+ for obj_type, count in sorted(stats["object_type_counts"].items(), key=lambda x: x[1], reverse=True):
+ summary_text += f" {obj_type:20s} {count:3d}\n"
+
+ return {
+ "summary_text": summary_text,
+ "statistics": stats,
+ "upstream_objects": [n["FQDependentObjectName"] for n in upstream_nodes],
+ "downstream_objects": [n["FQDependentObjectName"] for n in downstream_nodes],
+ }
+
+
+# ---------------------------------------------------------------------------
+# Public handler
+# ---------------------------------------------------------------------------
+
+
+def handle_graph_traceLineage(
+ conn: TeradataConnection,
+ object_name: str,
+ max_depth_up: int = 3,
+ max_depth_down: int = 3,
+ exclude_objects: str = "",
+ include_containers: str = "",
+ edge_repository: str = "",
+ return_format: str = "detailed",
+ tool_name: str | None = None,
+ *args,
+ **kwargs,
+):
+ """
+ Analyse object dependencies in Teradata. Supports wildcards (%) and CSV patterns.
+
+ Hybrid implementation — no stored procedure required. Python constructs
+ Teradata recursive CTEs that execute entirely server-side. Only the
+ reachable subgraph crosses the network — not the full edge table.
+
+ Examples: 'DB.Table' (single), '%WBC%.%' (wildcard), 'DB.T1,DB.T2' (CSV)
+
+ Finds upstream dependencies (what the object depends on) and downstream
+ dependents (what depends on the object). Returns nodes and edges
+ representing the dependency subgraph.
+
+ When multiple patterns are provided via CSV, one upstream CTE and one
+ downstream CTE is executed per pattern. Results are merged and
+ deduplicated by Python before assembly.
+
+ Use this for:
+ - Impact analysis: "What breaks if I change or drop this object?"
+ - Lineage tracing: "Where does this data come from?"
+ - Dependency discovery: "What does this object use?"
+ - Pre-deployment validation: checking impacts before making changes
+
+ Arguments:
+ object_name - str: Object name pattern(s).
+ Supports wildcards (%) and CSV format.
+ STRING type — not an array.
+
+ Single: 'DEV01_StGeo_STD_T.mortgage_account'
+ Wildcard: '%WBC%.%'
+ Multiple: '%WBC%.%,%StGeo%.%'
+
+ max_depth_up - int: Maximum levels to traverse upstream (0-10).
+ 0 = no upstream analysis. Default: 3
+
+ max_depth_down - int: Maximum levels to traverse downstream (0-10).
+ 0 = no downstream analysis. Default: 3
+
+ exclude_objects - str: CSV LIKE patterns to exclude.
+ Matches against DB.Object format.
+ Example: 'PRD_%,%.temp_%'
+ Default: '' (no exclusions)
+
+ include_containers - str: CSV of container LIKE patterns to include
+ (whitelist). Empty = all containers.
+ Default: '' (all containers)
+
+ edge_repository - str: Edge repository view/table conforming to the
+ Required parameter — no default.
+
+ return_format - str: 'detailed' (default), 'summary', or 'edges_only'
+
+ Returns:
+ ResponseType: formatted response with dependency analysis results.
+
+ detailed response structure:
+ {
+ "nodes": [...], // Unique nodes (deduplicated)
+ "upstream_edges": [...], // One row per upstream edge
+ "downstream_edges":[...], // One row per downstream edge
+ "summary": {...} // Aggregate statistics
+ }
+
+ Edge row fields:
+ DependentObjectDBName, DependentObjectName, FQDependentObjectName,
+ ReferencedObjectDBName, ReferencedObjectName, FQReferencedObjectName,
+ Src_Kind, Tgt_Kind, Depth, DependencyPath
+ """
+ logger.debug(
+ "Tool: handle_graph_traceLineage: Args: "
+ "object_name=%s, max_depth_up=%s, max_depth_down=%s, "
+ "exclude_objects=%s, include_containers=%s, "
+ "edge_repository=%s, return_format=%s",
+ object_name,
+ max_depth_up,
+ max_depth_down,
+ exclude_objects,
+ include_containers,
+ edge_repository,
+ return_format,
+ )
+
+ # -----------------------------------------------------------------------
+ # Validate and clamp depth parameters
+ # -----------------------------------------------------------------------
+ max_depth_up = max(0, min(10, int(max_depth_up)))
+ max_depth_down = max(0, min(10, int(max_depth_down)))
+
+ # -----------------------------------------------------------------------
+ # Parse pattern inputs
+ # -----------------------------------------------------------------------
+ seed_patterns = parse_csv_patterns(object_name)
+ excl_patterns = parse_csv_patterns(exclude_objects)
+ incl_containers = parse_csv_patterns(include_containers)
+
+ if not seed_patterns:
+ return create_response(
+ {"error": "object_name must not be empty"},
+ {
+ "tool_name": tool_name or "graph_traceLineage",
+ "object_name": object_name,
+ "status": "error",
+ },
+ )
+
+ if not edge_repository:
+ return create_response(
+ {"error": "edge_repository is required. Call graph_edgeContractDDL to generate one."},
+ {
+ "tool_name": tool_name or "graph_traceLineage",
+ "object_name": object_name,
+ "status": "error",
+ },
+ )
+
+ try:
+ # -----------------------------------------------------------------------
+ # Build shared SQL fragments (same for every seed pattern)
+ # -----------------------------------------------------------------------
+ incl_fragment = _build_or_like(incl_containers, "e.Src_Container_Name", "e.Tgt_Container_Name")
+ excl_fragment = _build_excl_fragment(excl_patterns, "e.Src_Container_Name", "e.Src_Object_Name")
+
+ all_edges_up: list[dict] = []
+ all_edges_down: list[dict] = []
+
+ with conn.cursor() as cur:
+ for pattern in seed_patterns:
+ # ---------------------------------------------------------------
+ # Upstream traversal (skip if max_depth_up == 0)
+ # ---------------------------------------------------------------
+ if max_depth_up > 0:
+ up_sql = _build_upstream_cte(
+ seed_pattern=pattern,
+ max_depth=max_depth_up,
+ edge_table=edge_repository,
+ incl_fragment=incl_fragment,
+ excl_fragment=excl_fragment,
+ )
+ logger.debug("Tool: handle_graph_traceLineage: Upstream CTE for pattern '%s':\n%s", pattern, up_sql)
+ cur.execute(up_sql)
+ batch = rows_to_json(cur.description, cur.fetchall())
+ all_edges_up.extend(batch)
+ logger.debug(
+ "Tool: handle_graph_traceLineage: Pattern '%s' upstream: %d edges", pattern, len(batch)
+ )
+
+ # ---------------------------------------------------------------
+ # Downstream traversal (skip if max_depth_down == 0)
+ # ---------------------------------------------------------------
+ if max_depth_down > 0:
+ down_sql = _build_downstream_cte(
+ seed_pattern=pattern,
+ max_depth=max_depth_down,
+ edge_table=edge_repository,
+ incl_fragment=incl_fragment,
+ excl_fragment=excl_fragment,
+ )
+ logger.debug(
+ "Tool: handle_graph_traceLineage: Downstream CTE for pattern '%s':\n%s", pattern, down_sql
+ )
+ cur.execute(down_sql)
+ batch = rows_to_json(cur.description, cur.fetchall())
+ all_edges_down.extend(batch)
+ logger.debug(
+ "Tool: handle_graph_traceLineage: Pattern '%s' downstream: %d edges", pattern, len(batch)
+ )
+
+ # -----------------------------------------------------------------------
+ # Deduplicate edges by (FQDependentObjectName, FQReferencedObjectName)
+ # -----------------------------------------------------------------------
+ def _dedup(edges: list[dict]) -> list[dict]:
+ """Remove duplicate edges, keeping the first occurrence."""
+ seen: set[tuple] = set()
+ out: list[dict] = []
+ for e in edges:
+ key = (
+ e.get("FQDependentObjectName"),
+ e.get("FQReferencedObjectName"),
+ )
+ if key not in seen:
+ seen.add(key)
+ out.append(e)
+ return out
+
+ edges_up = _dedup(all_edges_up)
+ edges_down = _dedup(all_edges_down)
+
+ # -----------------------------------------------------------------------
+ # Derive nodes and assemble response
+ # -----------------------------------------------------------------------
+ nodes_data = _derive_nodes_from_edges(edges_up, edges_down)
+
+ if return_format == "summary":
+ formatted_data = _format_summary(nodes_data, edges_up, edges_down, object_name)
+ elif return_format == "edges_only":
+ formatted_data = {
+ "upstream_edges": edges_up,
+ "downstream_edges": edges_down,
+ }
+ else: # detailed (default)
+ formatted_data = {
+ "nodes": nodes_data,
+ "upstream_edges": edges_up,
+ "downstream_edges": edges_down,
+ "summary": _create_summary_stats(nodes_data, edges_up, edges_down),
+ }
+
+ metadata = {
+ "tool_name": tool_name or "graph_traceLineage",
+ "object_name": object_name,
+ "max_depth_up": max_depth_up,
+ "max_depth_down": max_depth_down,
+ "edge_repository": edge_repository,
+ "return_format": return_format,
+ "counts": {
+ "nodes": len(nodes_data),
+ "upstream_edges": len(edges_up),
+ "downstream_edges": len(edges_down),
+ },
+ "status": "success",
+ "message": (
+ f"Dependency analysis complete: "
+ f"{len(nodes_data)} node(s), "
+ f"{len(edges_up)} upstream edge(s), "
+ f"{len(edges_down)} downstream edge(s)."
+ ),
+ }
+
+ logger.debug("Tool: handle_graph_traceLineage: metadata: %s", metadata)
+ return create_response(formatted_data, metadata)
+
+ except Exception as e:
+ logger.error("Tool: handle_graph_traceLineage: Error: %s", e, exc_info=True)
+ return create_response(
+ {"error": str(e)},
+ {
+ "tool_name": tool_name or "graph_traceLineage",
+ "object_name": object_name,
+ "status": "error",
+ },
+ )
+
+
+# ---------------------------------------------------------------------------
+# Tool registration descriptor
+# ---------------------------------------------------------------------------
+GRAPH_TRACE_LINEAGE_TOOL = {
+ "name": "graph_traceLineage",
+ "handler": handle_graph_traceLineage,
+ "description": (
+ "Analyse object dependencies in Teradata — finds upstream dependencies "
+ "(what the object depends on) and downstream dependents (what depends "
+ "on the object). Hybrid implementation: Python constructs Teradata "
+ "recursive CTEs that execute entirely server-side, so only the reachable "
+ "subgraph crosses the network. No stored procedure required. "
+ "Supports wildcards (%) and CSV patterns for object_name. "
+ "Use for impact analysis, lineage tracing, and pre-deployment validation. "
+ "Do NOT use for migration wave sequencing — use graph_bfsLevels for that. "
+ "Requires an edge repository conforming to the Graph Edge Contract. "
+ "If you don't have one yet, call graph_edgeContractDDL first to "
+ "generate the CREATE TABLE or CREATE VIEW DDL."
+ ),
+ "parameters": {
+ "object_name": {
+ "type": "string",
+ "description": (
+ "Object name pattern(s). Supports wildcards (%) and CSV. "
+ "Single: 'DB.Table'. Wildcard: '%WBC%.%'. "
+ "Multiple: '%WBC%.%,%StGeo%.%'."
+ ),
+ "required": True,
+ },
+ "max_depth_up": {
+ "type": "integer",
+ "description": "Maximum upstream levels to traverse (0-10). Default: 3.",
+ "default": 3,
+ },
+ "max_depth_down": {
+ "type": "integer",
+ "description": "Maximum downstream levels to traverse (0-10). Default: 3.",
+ "default": 3,
+ },
+ "exclude_objects": {
+ "type": "string",
+ "description": ("CSV of FQ object name LIKE patterns to exclude. Example: 'PRD_%,%.temp_%'. Default: ''."),
+ "default": "",
+ },
+ "include_containers": {
+ "type": "string",
+ "description": ("CSV of container LIKE patterns to include (whitelist). Default: '' (all containers)."),
+ "default": "",
+ },
+ "edge_repository": {
+ "type": "string",
+ "description": (
+ "Edge repository table or view conforming to the Graph Edge Contract. "
+ "Call graph_edgeContractDDL to generate one if needed. "
+ "Required parameter — no default."
+ ),
+ "required": True,
+ },
+ "return_format": {
+ "type": "string",
+ "description": ("Output format: 'detailed' (default), 'summary', or 'edges_only'."),
+ "default": "detailed",
+ },
+ },
+}
diff --git a/src/teradata_mcp_server/tools/module_loader.py b/src/teradata_mcp_server/tools/module_loader.py
index 9ecf5a4..9314480 100644
--- a/src/teradata_mcp_server/tools/module_loader.py
+++ b/src/teradata_mcp_server/tools/module_loader.py
@@ -24,6 +24,7 @@ class ModuleLoader:
"chat": "teradata_mcp_server.tools.chat",
"dba": "teradata_mcp_server.tools.dba",
"fs": "teradata_mcp_server.tools.fs",
+ "graph": "teradata_mcp_server.tools.graph",
"qlty": "teradata_mcp_server.tools.qlty",
"rag": "teradata_mcp_server.tools.rag",
"sql_opt": "teradata_mcp_server.tools.sql_opt",
diff --git a/src/teradata_mcp_server/tools/utils/__init__.py b/src/teradata_mcp_server/tools/utils/__init__.py
index 520a2de..e8edb29 100644
--- a/src/teradata_mcp_server/tools/utils/__init__.py
+++ b/src/teradata_mcp_server/tools/utils/__init__.py
@@ -21,8 +21,21 @@
# -------------------- Serialization & response helpers -------------------- #
def serialize_teradata_types(obj: Any) -> Any:
- """Convert Teradata-specific types to JSON serializable formats."""
- if isinstance(obj, date | datetime):
+ """Convert Teradata-specific types to JSON-serialisable formats.
+
+ Handles None explicitly so that database NULL values are preserved
+ as Python None (→ JSON null) rather than the string ``"None"``.
+
+ Args:
+ obj: The value to convert.
+
+ Returns:
+ A JSON-native type (str, int, float, bool, None) or an
+ ISO-formatted date string.
+ """
+ if obj is None:
+ return None
+ if isinstance(obj, (date, datetime)):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
@@ -40,17 +53,83 @@ def rows_to_json(cursor_description: Any, rows: list[Any]) -> list[dict[str, Any
return out
-def create_response(data: Any, metadata: dict[str, Any] | None = None, error: dict[str, Any] | None = None) -> str:
- """Create a standardized JSON response structure."""
+def _make_serialisable(obj: Any) -> Any:
+ """Recursively walk an object tree, converting every leaf to a
+ JSON-native Python type.
+
+ This is the deep-conversion counterpart of
+ :func:`serialize_teradata_types`. It ensures that nested dicts
+ and lists produced by tool handlers contain only types that
+ ``json.dumps`` can serialise without a custom *default* hook,
+ and — critically — that ``None`` values survive as ``None``
+ (JSON ``null``) instead of the string ``"None"``.
+
+ Args:
+ obj: Any Python object (scalar, dict, list, tuple, etc.).
+
+ Returns:
+ A recursively sanitised copy whose leaves are all
+ ``str | int | float | bool | None``.
+ """
+ if obj is None:
+ return None
+ if isinstance(obj, (str, int, float, bool)):
+ return obj
+ if isinstance(obj, (date, datetime)):
+ return obj.isoformat()
+ if isinstance(obj, Decimal):
+ return float(obj)
+ if isinstance(obj, dict):
+ return {k: _make_serialisable(v) for k, v in obj.items()}
+ if isinstance(obj, (list, tuple)):
+ return [_make_serialisable(item) for item in obj]
+ # Fallback: cast to string (e.g. bytes, custom objects)
+ return str(obj)
+
+
+def create_response(
+ data: Any,
+ metadata: dict[str, Any] | None = None,
+ error: dict[str, Any] | None = None,
+) -> dict:
+ """Create a standardised MCP response structure.
+
+ .. versionchanged:: 1.1.0
+ Returns a **dict** instead of a JSON string. The MCP
+ framework requires ``structured_content`` to be a ``dict``
+ (or ``None``); returning a JSON string caused the server to
+ wrap it in a ``[{"type": "text", ...}]`` list which the
+ framework rejected.
+
+ All nested values are recursively sanitised via
+ :func:`_make_serialisable` so that ``None`` / NULL values
+ are preserved as ``None`` (JSON ``null``) and Teradata-
+ specific types (``Decimal``, ``datetime``, etc.) are
+ converted to JSON-native equivalents.
+
+ Args:
+ data: Payload — typically a list of row-dicts.
+ metadata: Optional dict of tool metadata (tool_name, sql, etc.).
+ error: Optional error dict; if supplied the response
+ status is set to ``"error"``.
+
+ Returns:
+ dict: A JSON-serialisable dict ready to be used as
+ MCP ``structured_content``.
+ """
if error:
- resp = {"status": "error", "message": error}
+ resp: dict[str, Any] = {"status": "error", "message": error}
if metadata:
- resp["metadata"] = metadata
- return json.dumps(resp, default=serialize_teradata_types)
- resp = {"status": "success", "results": data}
+ resp["metadata"] = _make_serialisable(metadata)
+ return resp
+
+ resp = {
+ "status": "success",
+ "results": _make_serialisable(data),
+ }
if metadata:
- resp["metadata"] = metadata
- return json.dumps(resp, default=serialize_teradata_types)
+ resp["metadata"] = _make_serialisable(metadata)
+ return resp
# ------------------------------ Auth helpers ------------------------------ #