fix(max-ai): handle SQL generation retry exhaustion gracefully (#64818)

posthog[bot] · web-flow · commit 1b0e3336f610 · 2026-06-21T00:41:22.000Z
Co-authored-by: posthog[bot] &lt;206114724+posthog[bot]@users.noreply.github.com&gt;
diff --git a/ee/hogai/chat_agent/query_executor/nodes.py b/ee/hogai/chat_agent/query_executor/nodes.py
@@ -59,6 +59,9 @@ async def _extract_artifact(self, state: AssistantState) -> ArtifactMessage | No
         if isinstance(last_message, FailureMessage):
             return None  # Exit early - something failed earlier
 
+        if isinstance(last_message, AssistantToolCallMessage):
+            return None  # Exit early - a generator already produced a terminal tool response (e.g. graceful failure)
+
         if not isinstance(last_message, ArtifactRefMessage):
             raise ValueError(f"Expected an ArtifactRefMessage, found {type(last_message)}")
 
diff --git a/ee/hogai/chat_agent/sql/mixins.py b/ee/hogai/chat_agent/sql/mixins.py
@@ -170,10 +170,16 @@ def _validate_hogql_query_sync(self, query: str) -> AssistantHogQLQuery:
             err_msg = str(err)
             # Both the antlr-based cpp parser and the hand-rolled rust-py parser produce
             # terse low-level error wording on syntax failures ("no viable alternative…",
-            # "trailing tokens after expression…", "unexpected token in expression…").
-            # Replace any of them with a single human/LLM-friendly message.
+            # "trailing tokens after expression…", "unexpected token in expression…",
+            # "mismatched input … expecting …"). Replace any of them with a single
+            # human/LLM-friendly message.
             if err_msg.startswith(
-                ("no viable alternative", "trailing tokens after expression", "unexpected token in expression")
+                (
+                    "no viable alternative",
+                    "trailing tokens after expression",
+                    "unexpected token in expression",
+                    "mismatched input",
+                )
             ):
                 err_msg = "HogQL parsing error: this query isn't valid HogQL."
             raise PydanticOutputParserException(llm_output=cleaned_query, validation_message=err_msg)
diff --git a/ee/hogai/chat_agent/sql/nodes.py b/ee/hogai/chat_agent/sql/nodes.py
@@ -1,13 +1,16 @@
+from uuid import uuid4
+
 from langchain_core.runnables import RunnableConfig
 
-from posthog.schema import DataVisualizationNode
+from posthog.schema import AssistantToolCallMessage, DataVisualizationNode, FailureMessage
 
 from posthog.hogql.context import HogQLContext
 
 from ee.hogai.utils.types import AssistantState, PartialAssistantState
 
-from ..schema_generator.nodes import SchemaGeneratorNode, SchemaGeneratorToolsNode
+from ..schema_generator.nodes import SchemaGenerationException, SchemaGeneratorNode, SchemaGeneratorToolsNode
 from .mixins import HogQLGeneratorMixin, SQLSchemaGeneratorOutput
+from .prompts import SQL_GENERATION_FAILURE_MESSAGE
 from .toolkit import SQL_SCHEMA
 
 
@@ -20,7 +23,35 @@ class SQLGeneratorNode(HogQLGeneratorMixin, SchemaGeneratorNode[DataVisualizatio
 
     async def arun(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState:
         prompt = await self._construct_system_prompt()
-        return await super()._run_with_prompt(state, prompt, config=config)
+        try:
+            return await super()._run_with_prompt(state, prompt, config=config)
+        except SchemaGenerationException as e:
+            # The LLM exhausted its retries on invalid HogQL. Surface this as a graceful tool
+            # response so the calling agent can recover, instead of letting it bubble up to the
+            # runner's generic handler and be captured as an unhandled application error.
+            return self._handle_generation_failure(state, e)
+
+    def _handle_generation_failure(
+        self, state: AssistantState, error: SchemaGenerationException
+    ) -> PartialAssistantState:
+        tool_call_id = state.root_tool_call_id
+        content = SQL_GENERATION_FAILURE_MESSAGE.format(error_message=error.validation_message)
+        # Respond to the calling agent when there's a tool call to answer; otherwise emit a
+        # FailureMessage so the run still terminates cleanly via the query executor.
+        message = (
+            AssistantToolCallMessage(content=content, id=str(uuid4()), tool_call_id=tool_call_id)
+            if tool_call_id
+            else FailureMessage(content=content, id=str(uuid4()))
+        )
+        return PartialAssistantState(
+            messages=[message],
+            intermediate_steps=None,
+            plan=None,
+            rag_context=None,
+            root_tool_call_id=None,
+            root_tool_insight_plan=None,
+            root_tool_insight_type=None,
+        )
 
 
 class SQLGeneratorToolsNode(SchemaGeneratorToolsNode):
diff --git a/ee/hogai/chat_agent/sql/prompts.py b/ee/hogai/chat_agent/sql/prompts.py
@@ -1,3 +1,7 @@
+SQL_GENERATION_FAILURE_MESSAGE = (
+    "I wasn't able to generate a valid SQL query for this request after several attempts. Error: {error_message}"
+)
+
 HOGQL_GENERATOR_SYSTEM_PROMPT = """
 You are an expert in writing HogQL. HogQL is PostHog's variant of SQL that supports most of ClickHouse SQL. We're going to use terms "HogQL" and "SQL" interchangeably.
 You write HogQL based on a prompt. You don't help with other knowledge. You are provided with the current HogQL query that the user is editing. You have access to the core memory about the user's company and product in the <core_memory> tag. Use this memory in your responses.
diff --git a/ee/hogai/chat_agent/sql/test/test_nodes.py b/ee/hogai/chat_agent/sql/test/test_nodes.py
@@ -2,18 +2,23 @@
 from unittest.mock import patch
 
 from langchain_core.runnables import RunnableConfig, RunnableLambda
+from parameterized import parameterized
 
-from posthog.schema import ArtifactContentType, ArtifactSource, HumanMessage
+from posthog.schema import ArtifactContentType, ArtifactSource, AssistantToolCallMessage, FailureMessage, HumanMessage
 
 from products.posthog_ai.backend.models.assistant import Conversation
 
+from ee.hogai.chat_agent.schema_generator.nodes import SchemaGenerationException
 from ee.hogai.chat_agent.sql.nodes import SQLGeneratorNode
 from ee.hogai.utils.types import AssistantState
 from ee.hogai.utils.types.base import ArtifactRefMessage
 
 
 class TestSQLGeneratorNode(NonAtomicBaseTest):
     maxDiff = None
+    # NonAtomicBaseTest truncates all tables (RESTART IDENTITY) after each test, so class-level
+    # test data created once in setUpClass is gone by the second test. Recreate it per test.
+    CLASS_DATA_LEVEL_SETUP = False
 
     def setUp(self):
         super().setUp()
@@ -51,3 +56,43 @@ async def test_node_runs(self):
             self.assertIsNone(new_state.intermediate_steps)
             self.assertIsNone(new_state.plan)
             self.assertIsNone(new_state.rag_context)
+
+    @parameterized.expand(
+        [
+            ("with_tool_call", "tool_123", AssistantToolCallMessage),
+            ("without_tool_call", None, FailureMessage),
+        ]
+    )
+    async def test_node_handles_retry_exhaustion_gracefully(self, _name, root_tool_call_id, expected_message_type):
+        node = SQLGeneratorNode(self.team, self.user)
+        config = RunnableConfig(configurable={"thread_id": str(self.conversation.id)})
+
+        async def _raise(*args, **kwargs):
+            raise SchemaGenerationException(
+                "WITH date_end AS toDate(now()) SELECT 1",
+                "HogQL parsing error: this query isn't valid HogQL.",
+            )
+
+        with patch("ee.hogai.chat_agent.schema_generator.nodes.SchemaGeneratorNode._run_with_prompt", new=_raise):
+            new_state = await node(
+                AssistantState(
+                    messages=[HumanMessage(content="Text")],
+                    plan="Plan",
+                    root_tool_call_id=root_tool_call_id,
+                    root_tool_insight_plan="question",
+                ),
+                config,
+            )
+
+        assert new_state is not None
+        self.assertEqual(len(new_state.messages), 1)
+        msg = new_state.messages[0]
+        self.assertIsInstance(msg, expected_message_type)
+        assert isinstance(msg, AssistantToolCallMessage | FailureMessage)
+        assert msg.content is not None
+        self.assertIn("valid SQL query", msg.content)
+        if isinstance(msg, AssistantToolCallMessage):
+            self.assertEqual(msg.tool_call_id, root_tool_call_id)
+        # Node ends gracefully and clears the tool call so the run terminates
+        self.assertIsNone(new_state.root_tool_call_id)
+        self.assertIsNone(new_state.intermediate_steps)