feat: customize intent in workflows and update tests

wangauone · wangauone · commit adcbe110e2b2 · 2026-01-25T16:39:58.000-08:00
- Update prompts to explicitly handle user-provided intent and prevent inference
- Refactor facet generator tests for multiple items and remove duplicates
- Fix function naming in question generator tests
- Update main logic and models to support intent customization
diff --git a/mcp/facet/facet_generator.py b/mcp/facet/facet_generator.py
@@ -3,7 +3,7 @@
 from model import context
 
 
-async def generate_facets_from_items(
+async def generate_facets(
     facet_inputs_json: str, sql_dialect: str = "postgresql"
 ) -> str:
     """
@@ -26,25 +26,20 @@ async def generate_facets_from_items(
     final_facets = []
 
     for item in item_list:
-        question = item["question"]
-        # Support both 'sql_snippet' (preferred) and 'facet' (legacy) keys
-        facet_text = item.get("sql_snippet", item.get("facet"))
-        if not facet_text:
-            # Skip malformed items or raise error? For now, we might want to be robust
-            # But if both are missing, we might have an issue.
-            # Let's assume validation happened or we just let it fail later if None.
-            # Actually, to be safe and avoid KeyError if strict logic elsewhere:
-            raise KeyError("Each item must have a 'sql_snippet' or 'facet' key.")
 
-        intent = item.get(
-            "intent", question
-        )  # Use provided intent or fallback to question
+        sql_snippet = item.get("sql_snippet")
+        if not sql_snippet:
+            return '{"error": "Each item must have a \'sql_snippet\' key."}'
 
-        # 1. Extract value phrases from the question
-        phrases = await parameterizer.extract_value_phrases(nl_query=question)
+        intent = item.get("intent")
+        if not intent:
+             return '{"error": "Each item must have an \'intent\' key."}'
+
+        # 1. Extract value phrases from the intent (used as nl_query)
+        phrases = await parameterizer.extract_value_phrases(nl_query=intent)
 
         # 2. Generate the manifest
-        manifest = question
+        manifest = intent
         # Sort keys by length descending to replace longer phrases first
         sorted_phrases = sorted(phrases.keys(), key=len, reverse=True)
         for phrase in sorted_phrases:
@@ -54,12 +49,12 @@ async def generate_facets_from_items(
 
         # 3. Parameterize the SQL and Intent
         parameterized_result = parameterizer.parameterize_sql_and_intent(
-            phrases, facet_text, intent, db_dialect=db_dialect
+            phrases, sql_snippet, intent, db_dialect=db_dialect
         )
 
         # 4. Assemble the final facet object
         facet = context.Facet(
-            sql_snippet=facet_text,
+            sql_snippet=sql_snippet,
             intent=intent,
             manifest=manifest,
             parameterized=context.ParameterizedFacet(
diff --git a/mcp/main.py b/mcp/main.py
@@ -35,7 +35,7 @@ async def generate_sql_pairs(
         has a "question" and a "sql" key.
         Example: '[{"question": "...", "sql": "..."}]'
     """
-    return await question_generator.generate_sql_pairs_from_schema(
+    return await question_generator.generate_sql_pairs(
         db_schema, context, table_names, sql_dialect
     )
 
@@ -45,20 +45,20 @@ async def generate_templates(
     template_inputs_json: str, sql_dialect: str = "postgresql"
 ) -> str:
     """
-    Generates final templates from a list of user-approved question, SQL statement, and optional intent.
+    Generates final templates from a list of user-approved template question, template SQL statement, and optional template intent.
 
     Args:
         template_inputs_json: A JSON string representing a list of dictionaries (template inputs),
                              where each dictionary has "question", "sql", and optional "intent" keys.
                              Example (with intent): '[{"question": "How many users?", "sql": "SELECT count(*) FROM users", "intent": "Count total users"}]'
                              Example (default intent): '[{"question": "List all items", "sql": "SELECT * FROM items"}]'
         sql_dialect: The SQL dialect to use for parameterization. Accepted
-                   values are 'postgresql', 'mysql', or 'googlesql'.
+                   values are 'postgresql' (default), 'mysql', or 'googlesql'.
 
     Returns:
         A JSON string representing a ContextSet object.
     """
-    return await template_generator.generate_templates_from_items(
+    return await template_generator.generate_templates(
         template_inputs_json, sql_dialect
     )
 
@@ -68,20 +68,19 @@ async def generate_facets(
     facet_inputs_json: str, sql_dialect: str = "postgresql"
 ) -> str:
     """
-    Generates final facets from a list of user-approved question, SQL snippet, and optional intent.
+    Generates final facets from a list of user-approved facet intent and facet SQL snippet.
 
     Args:
         facet_inputs_json: A JSON string representing a list of dictionaries (facet inputs),
-                             where each dictionary has "question", "sql_snippet", and optional "intent".
-                             Example (with intent): '[{"question": "expensive items", "sql_snippet": "price > 1000", "intent": "Filter by high price"}]'
-                             Example (default intent): '[{"question": "active users", "sql_snippet": "status = 'active'"}]'
+                             where each dictionary has "intent" and "sql_snippet".
+                             Example: '[{"intent": "high price", "sql_snippet": "price > 1000"}]'
         sql_dialect: The SQL dialect to use for parameterization. Accepted
-                   values are 'postgresql', 'mysql', or 'googlesql'.
+                   values are 'postgresql' (default), 'mysql', or 'googlesql'.
 
     Returns:
         A JSON string representing a ContextSet object.
     """
-    return await facet_generator.generate_facets_from_items(
+    return await facet_generator.generate_facets(
         facet_inputs_json, sql_dialect
     )
 
@@ -211,19 +210,19 @@ def generate_upload_url(
 
 @mcp.prompt
 def generate_bulk_templates() -> str:
-    """Initiates a guided workflow to generate Question/SQL pair templates."""
+    """Initiates a guided workflow to automatically generate templates based on the database schema."""
     return prompts.GENERATE_BULK_TEMPLATES_PROMPT
 
 
 @mcp.prompt
 def generate_targeted_templates() -> str:
-    """Initiates a guided workflow to generate specific Question/SQL pair templates."""
+    """Initiates a guided workflow to generate specific templates based on the user's input."""
     return prompts.GENERATE_TARGETED_TEMPLATES_PROMPT
 
 
 @mcp.prompt
 def generate_targeted_facets() -> str:
-    """Initiates a guided workflow to generate specific Phrase/SQL facet pair templates."""
+    """Initiates a guided workflow to generate specific facets based on the user's input."""
     return prompts.GENERATE_TARGETED_FACETS_PROMPT
 
 
diff --git a/mcp/model/context.py b/mcp/model/context.py
@@ -33,6 +33,7 @@ class ParameterizedFacet(BaseModel):
     parameterized_sql_snippet: str = Field(
         ...,
         description="The SQL facet with placeholders (eg., ).",
+        # "fragment" is deprecated, keep alias for backward compatibility
         validation_alias=AliasChoices(
             "parameterized_sql_snippet", "parameterized_fragment"
         ),
@@ -48,7 +49,10 @@ class Facet(BaseModel):
     sql_snippet: str = Field(
         ...,
         description="The corresponding, complete SQL facet.",
-        validation_alias=AliasChoices("sql_snippet", "fragment"),
+        # "fragment" is deprecated, keep alias for backward compatibility
+        validation_alias=AliasChoices(
+            "sql_snippet", "fragment"
+        ),  
     )
     intent: str = Field(..., description="The user's specific intent.")
     manifest: str = Field(
@@ -66,5 +70,8 @@ class ContextSet(BaseModel):
     facets: List[Facet] | None = Field(
         None,
         description="A list of SQL facets.",
-        validation_alias=AliasChoices("facets", "fragments"),
+        # "fragments" is deprecated, keep alias for backward compatibility
+        validation_alias=AliasChoices(
+            "facets", "fragments"
+        ),  
     )
diff --git a/mcp/prompts/bulk_templates.py b/mcp/prompts/bulk_templates.py
@@ -2,7 +2,7 @@
 
 GENERATE_BULK_TEMPLATES_PROMPT = textwrap.dedent(
     """
-    **Workflow for Generating Question/SQL Pair Templates**
+    **Workflow for Automatically Generating Templates**
 
     1.  **Discover and Select Database:**
         - Find all connected databases from the MCP Toolbox and `tools.yaml`.
diff --git a/mcp/prompts/targeted_facets.py b/mcp/prompts/targeted_facets.py
@@ -2,24 +2,23 @@
 
 GENERATE_TARGETED_FACETS_PROMPT = textwrap.dedent(
     """
-    **Workflow for Generating Targeted Phrase/SQL Facet Pair Templates**
+    **Workflow for Generating Targeted Facets**
 
     1.  **User Input Loop:**
-        - Ask the user to provide a natural language phrase and its corresponding SQL facet.
-        - **Optionally**, ask if they want to provide a specific "intent" for this pair. If not provided, the phrase will be used as the intent.
-        - After capturing the pair, ask the user if they would like to add another one.
+        - Ask the user to provide an intent and its corresponding SQL snippet.
+        - **Important:** Do not infer the intent or SQL snippet. Wait for the user to provide them.
+        - After capturing the intent and SQL snippet pair, ask the user if they would like to add another one.
         - Continue this loop until the user indicates they have no more pairs to add.
 
     2.  **Review and Confirmation:**
-        - Present the complete list of user-provided Phrase/SQL facet pairs for confirmation.
-          - **Use the following format for each pair:**
-            **Pair [Number]**
-            **Phrase:** [The natural language phrase]
-            **Facet:**
+        - Present the complete list of user-provided Intent/SQL snippet pairs for confirmation.
+          - **Use the following format for each facet:**
+            **Facet [Number]**
+            **Intent:** [The intent]
+            **SQL snippet:**
             ```sql
-            [The SQL facet, properly formatted]
+            [The SQL snippet, properly formatted]
             ```
-            **Intent:** [The intent, if provided. Otherwise "Same as Phrase"]
         - Ask if any modifications are needed. If so, work with the user to refine the pairs.
 
     3.  **Final Facet Generation:**
diff --git a/mcp/prompts/targeted_templates.py b/mcp/prompts/targeted_templates.py
@@ -2,18 +2,19 @@
 
 GENERATE_TARGETED_TEMPLATES_PROMPT = textwrap.dedent(
     """
-    **Workflow for Generating Targeted Question/SQL Pair Templates**
+    **Workflow for Generating Targeted Templates**
 
     1.  **User Input Loop:**
         - Ask the user to provide a natural language question and its corresponding SQL query.
         - **Optionally**, ask if they want to provide a specific "intent" for this pair. If not provided, the question will be used as the intent.
-        - After capturing the pair, ask the user if they would like to add another one.
-        - Continue this loop until the user indicates they have no more pairs to add.
+        - **Important:** Do not infer the question or SQL query. Wait for the user to provide them.
+        - After capturing the inputs for a template, ask the user if they would like to add another one.
+        - Continue this loop until the user indicates they have no more to add.
 
     2.  **Review and Confirmation:**
         - Present the complete list of user-provided Question/SQL pairs for confirmation.
           - **Use the following format for each pair:**
-            **Pair [Number]**
+            **Template [Number]**
             **Question:** [The natural language question]
             **SQL:**
             ```sql
diff --git a/mcp/template/question_generator.py b/mcp/template/question_generator.py
@@ -22,7 +22,7 @@ class QuestionSQLPairs(BaseModel):
     pairs: List[QuestionSQLPair]
 
 
-async def generate_sql_pairs_from_schema(
+async def generate_sql_pairs(
     db_schema: str,
     context: str | None = None,
     table_names: List[str] | None = None,
diff --git a/mcp/template/template_generator.py b/mcp/template/template_generator.py
@@ -3,7 +3,7 @@
 from model import context
 
 
-async def generate_templates_from_items(
+async def generate_templates(
     template_inputs_json: str, sql_dialect: str = "postgresql"
 ) -> str:
     """
diff --git a/mcp/tests/facet/facet_generator_test.py b/mcp/tests/facet/facet_generator_test.py
diff --git a/mcp/tests/template/question_generator_test.py b/mcp/tests/template/question_generator_test.py
diff --git a/mcp/tests/template/template_generator_test.py b/mcp/tests/template/template_generator_test.py