KG-545 Fix requestLLMOnlyCallingTools ignoring tool calls after reasoning message (#1198)

mltheuser · Malte Heuser · web-flow · commit 2b84ddd78461 · 2025-12-11T20:25:49.000+01:00
## Motivation and Context Related to [KG-545](https://youtrack.jetbrains.com/issue/KG-545). Currently, `requestLLMOnlyCallingTools` relies on `executeSingle`, which returns the first message received from the LLM. When using models that output Chain of Thought or "Thinking" blocks (e.g., Nova, Claude) prior to calling a tool, the response sequence is often `[Message.Assistant(Thinking), Message.Tool.Call]`. As a result: 1. The method returns the "Thinking" text message instead of the expected Tool Call. 2. The actual Tool Call is discarded and never executed or saved to history. **Changes:** * Introduced `requestLLMMultipleOnlyCallingTools()` in `AIAgentLLMSession` to allow retrieving the full list of messages while enforcing `ToolChoice.Required`. * Updated `requestLLMOnlyCallingTools` to use this new method. It now persists **all** messages (preserving the reasoning context in the session history) but filters the return value to ensure the caller receives the `Message.Tool.Call`. ## Breaking Changes None. This is a behavioral fix to ensure the method contract (returning a tool call) is honored when the LLM is "chatty" or provides reasoning. --- #### Type of the changes - [ ] New feature (non-breaking change which adds functionality) - [x] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Tests improvement - [ ] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [x] An issue describing the proposed change exists - [x] The pull request includes a link to the issue - [x] The change was discussed and approved in the issue - [ ] Docs have been added / updated --------- Co-authored-by: Malte Heuser <malte.heuser@ing.com>
diff --git a/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMSession.kt b/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMSession.kt
@@ -146,17 +146,43 @@ public sealed class AIAgentLLMSession(
     /**
      * Sends a request to the language model that enforces the usage of tools and retrieves the response.
      *
-     * This method updates the session's prompt configuration to mark tool usage as required before
-     * executing the request. Additionally, it ensures the session is active before proceeding.
+     * This method:
+     * 1. Validates that the session is active.
+     * 2. Updates the prompt configuration to mark tool usage as required (`ToolChoice.Required`).
+     * 3. Retrieves all generated messages (including potential Chain of Thought/Reasoning blocks).
+     * 4. Filters the result to return the first [Message.Tool.Call].
      *
-     * @return The response from the language model after executing the request with enforced tool usage.
+     * If no tool call is found (e.g., the model refused or asked a question), this method throws an exception.
+     *
+     * @return The tool call response from the language model.
      */
     public open suspend fun requestLLMOnlyCallingTools(): Message.Response {
+        validateSession()
+        // We use the multiple-response method to ensure we capture all context (e.g. thinking)
+        // even though we only return the specific tool call.
+        val responses = requestLLMMultipleOnlyCallingTools()
+        return responses.firstOrNull { it is Message.Tool.Call }
+            ?: error("requestLLMOnlyCallingTools expected at least one Tool.Call but received: ${responses.map { it::class.simpleName }}")
+    }
+
+    /**
+     * Sends a request to the language model that enforces the usage of tools and retrieves all responses.
+     *
+     * This is useful when the LLM returns multiple messages, such as a "thinking" block followed by tool calls,
+     * or multiple parallel tool calls.
+     *
+     * This method:
+     * 1. Validates that the session is active.
+     * 2. Updates the prompt configuration to mark tool usage as required (`ToolChoice.Required`).
+     *
+     * @return A list of responses from the language model.
+     */
+    public open suspend fun requestLLMMultipleOnlyCallingTools(): List<Message.Response> {
         validateSession()
         val promptWithOnlyCallingTools = prompt.withUpdatedParams {
             toolChoice = LLMParams.ToolChoice.Required
         }
-        return executeSingle(promptWithOnlyCallingTools, tools)
+        return executeMultiple(promptWithOnlyCallingTools, tools)
     }
 
     /**
diff --git a/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMWriteSession.kt b/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMWriteSession.kt
@@ -349,9 +349,7 @@ public class AIAgentLLMWriteSession internal constructor(
      */
     override suspend fun requestLLMMultipleWithoutTools(): List<Message.Response> {
         return super.requestLLMMultipleWithoutTools().also { responses ->
-            appendPrompt {
-                responses.forEach { message(it) }
-            }
+            appendPrompt { messages(responses) }
         }
     }
 
@@ -368,13 +366,17 @@ public class AIAgentLLMWriteSession internal constructor(
     }
 
     /**
-     * Requests a response from the Language Learning Model (LLM) while also processing
-     * the response by updating the current prompt with the received message.
+     * Requests a response from the Language Model (LLM) enforcing tool usage (`ToolChoice.Required`),
+     * validates the session, and processes all returned messages (e.g. thinking + tool call).
+     *
+     * Crucially, this method appends **all** received messages to the prompt history to preserve context.
      *
-     * @return The response received from the Language Learning Model (LLM).
+     * @return A list of responses received from the Language Model (LLM).
      */
-    override suspend fun requestLLMOnlyCallingTools(): Message.Response {
-        return super.requestLLMOnlyCallingTools().also { response -> appendPrompt { message(response) } }
+    override suspend fun requestLLMMultipleOnlyCallingTools(): List<Message.Response> {
+        return super.requestLLMMultipleOnlyCallingTools().also { responses ->
+            appendPrompt { messages(responses) }
+        }
     }
 
     /**
@@ -419,9 +421,7 @@ public class AIAgentLLMWriteSession internal constructor(
      */
     override suspend fun requestLLMMultiple(): List<Message.Response> {
         return super.requestLLMMultiple().also { responses ->
-            appendPrompt {
-                responses.forEach { message(it) }
-            }
+            appendPrompt { messages(responses) }
         }
     }
 
diff --git a/agents/agents-core/src/commonTest/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMWriteSessionTest.kt b/agents/agents-core/src/commonTest/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMWriteSessionTest.kt
@@ -369,4 +369,109 @@ class AIAgentLLMWriteSessionTest {
         val response = session.requestLLM()
         assertEquals("Changed params response", response.content)
     }
+
+    @Test
+    fun testRequestLLMMultipleOnlyCallingTools() = runTest {
+        val thinkingContent = "<thinking>I need to use a tool</thinking>"
+        val testTool = TestTool()
+
+        val mockExecutor = getMockExecutor(clock = testClock) {
+            // Simulate [Assistant, ToolCall] sequence
+            mockLLMMixedResponse(
+                toolCalls = listOf(testTool to TestTool.Args("test")),
+                responses = listOf(thinkingContent)
+            ) onCondition { true }
+        }
+
+        val session = createSession(mockExecutor, listOf(testTool))
+
+        val responses = session.requestLLMMultipleOnlyCallingTools()
+
+        assertEquals(2, responses.size)
+        assertEquals(thinkingContent, (responses[0] as Message.Assistant).content)
+        assertEquals("test-tool", (responses[1] as Message.Tool.Call).tool)
+
+        // Verify that BOTH messages were appended to the prompt history in correct order
+        val lastTwoMessages = session.prompt.messages.takeLast(2)
+        assertEquals(thinkingContent, (lastTwoMessages[0] as Message.Assistant).content)
+        assertEquals("test-tool", (lastTwoMessages[1] as Message.Tool.Call).tool)
+    }
+
+    @Test
+    fun testRequestLLMOnlyCallingToolsWithThinking() = runTest {
+        val thinkingContent = "<thinking>Checking file...</thinking>"
+        val testTool = TestTool()
+
+        val mockExecutor = getMockExecutor(clock = testClock) {
+            mockLLMMixedResponse(
+                toolCalls = listOf(testTool to TestTool.Args("test")),
+                responses = listOf(thinkingContent)
+            ) onCondition { true }
+        }
+
+        val session = createSession(mockExecutor, listOf(testTool))
+
+        val response = session.requestLLMOnlyCallingTools()
+
+        // It should strictly return the ToolCall (fixing the bug), skipping the thinking message
+        assertTrue(response is Message.Tool.Call, "Expected response to be a Tool Call, not the thinking message")
+        assertEquals("test-tool", response.tool)
+
+        // It should still persist the "Thinking" message in history in correct order
+        val lastTwoMessages = session.prompt.messages.takeLast(2)
+        assertEquals(thinkingContent, (lastTwoMessages[0] as Message.Assistant).content)
+        assertEquals("test-tool", (lastTwoMessages[1] as Message.Tool.Call).tool)
+    }
+
+    @Test
+    fun testRequestLLMOnlyCallingToolsNoToolCallThrowsException() = runTest {
+        val mockExecutor = getMockExecutor(clock = testClock) {
+            // Simulate model refusing to use tools and just responding with text
+            mockLLMAnswer("I cannot use tools for this request.").asDefaultResponse
+        }
+
+        val session = createSession(mockExecutor, listOf(TestTool()))
+
+        val exception = kotlin.runCatching {
+            session.requestLLMOnlyCallingTools()
+        }.exceptionOrNull()
+
+        assertNotNull(exception, "Expected an exception when no tool call is found")
+        assertTrue(
+            exception is IllegalStateException,
+            "Expected IllegalStateException but got ${exception::class.simpleName}"
+        )
+        assertTrue(
+            exception.message?.contains("expected at least one Tool.Call") == true,
+            "Exception message should indicate missing tool call"
+        )
+    }
+
+    @Test
+    fun testRequestLLMOnlyCallingToolsWithMultipleToolCalls() = runTest {
+        val testTool = TestTool()
+
+        val mockExecutor = getMockExecutor(clock = testClock) {
+            // Simulate model returning multiple tool calls (parallel tool calling)
+            mockLLMMixedResponse(
+                toolCalls = listOf(
+                    testTool to TestTool.Args("first"),
+                    testTool to TestTool.Args("second")
+                ),
+                responses = emptyList()
+            ) onCondition { true }
+        }
+
+        val session = createSession(mockExecutor, listOf(testTool))
+
+        val response = session.requestLLMOnlyCallingTools()
+
+        // Should return the first tool call
+        assertTrue(response is Message.Tool.Call, "Expected response to be a Tool Call")
+        assertEquals("test-tool", response.tool)
+
+        // Both tool calls should be in history
+        val lastTwoMessages = session.prompt.messages.takeLast(2)
+        assertTrue(lastTwoMessages.all { it is Message.Tool.Call })
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -349,9 +349,7 @@ public class AIAgentLLMWriteSession internal constructor(`
`349`	`349`	`*/`
`350`	`350`	`override suspend fun requestLLMMultipleWithoutTools(): List<Message.Response> {`
`351`	`351`	`return super.requestLLMMultipleWithoutTools().also { responses ->`
`352`		`- appendPrompt {`
`353`		`- responses.forEach { message(it) }`
`354`		`- }`
	`352`	`+ appendPrompt { messages(responses) }`
`355`	`353`	`}`
`356`	`354`	`}`
`357`	`355`
`@@ -368,13 +366,17 @@ public class AIAgentLLMWriteSession internal constructor(`
`368`	`366`	`}`
`369`	`367`
`370`	`368`	`/**`
`371`		`- * Requests a response from the Language Learning Model (LLM) while also processing`
`372`		`- * the response by updating the current prompt with the received message.`
	`369`	+ * Requests a response from the Language Model (LLM) enforcing tool usage (`ToolChoice.Required`),
	`370`	`+ * validates the session, and processes all returned messages (e.g. thinking + tool call).`
	`371`	`+ *`
	`372`	`+ * Crucially, this method appends all received messages to the prompt history to preserve context.`
`373`	`373`	`*`
`374`		`- * @return The response received from the Language Learning Model (LLM).`
	`374`	`+ * @return A list of responses received from the Language Model (LLM).`
`375`	`375`	`*/`
`376`		`- override suspend fun requestLLMOnlyCallingTools(): Message.Response {`
`377`		`- return super.requestLLMOnlyCallingTools().also { response -> appendPrompt { message(response) } }`
	`376`	`+ override suspend fun requestLLMMultipleOnlyCallingTools(): List<Message.Response> {`
	`377`	`+ return super.requestLLMMultipleOnlyCallingTools().also { responses ->`
	`378`	`+ appendPrompt { messages(responses) }`
	`379`	`+ }`
`378`	`380`	`}`
`379`	`381`
`380`	`382`	`/**`
`@@ -419,9 +421,7 @@ public class AIAgentLLMWriteSession internal constructor(`
`419`	`421`	`*/`
`420`	`422`	`override suspend fun requestLLMMultiple(): List<Message.Response> {`
`421`	`423`	`return super.requestLLMMultiple().also { responses ->`
`422`		`- appendPrompt {`
`423`		`- responses.forEach { message(it) }`
`424`		`- }`
	`424`	`+ appendPrompt { messages(responses) }`
`425`	`425`	`}`
`426`	`426`	`}`
`427`	`427`