Test executeStreaming with tool calls (#1261)

aozherelyeva · web-flow · commit 521e64d4c12c · 2025-12-12T12:15:47.000+01:00
## Motivation and Context  Add `integration_testExecuteStreamingWithTools` to test streaming+tools functionality. ## Breaking Changes  --- #### Type of the changes - [ ] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Tests improvement - [ ] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [ ] An issue describing the proposed change exists - [ ] The pull request includes a link to the issue - [ ] The change was discussed and approved in the issue - [ ] Docs have been added / updated
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt
@@ -23,11 +23,13 @@ import ai.koog.integration.tests.utils.structuredOutput.getConfigNoFixingParserM
 import ai.koog.integration.tests.utils.structuredOutput.getConfigNoFixingParserNative
 import ai.koog.integration.tests.utils.structuredOutput.parseMarkdownStreamToCountries
 import ai.koog.integration.tests.utils.structuredOutput.weatherStructuredOutputPrompt
+import ai.koog.integration.tests.utils.tools.CalculatorOperation
 import ai.koog.integration.tests.utils.tools.CalculatorTool
 import ai.koog.integration.tests.utils.tools.LotteryTool
 import ai.koog.integration.tests.utils.tools.PickColorFromListTool
 import ai.koog.integration.tests.utils.tools.PickColorTool
 import ai.koog.integration.tests.utils.tools.PriceCalculatorTool
+import ai.koog.integration.tests.utils.tools.SimpleCalculatorTool
 import ai.koog.integration.tests.utils.tools.SimplePriceCalculatorTool
 import ai.koog.integration.tests.utils.tools.calculatorPrompt
 import ai.koog.integration.tests.utils.tools.calculatorPromptNotRequiredOptionalParams
@@ -181,9 +183,6 @@ abstract class ExecutorIntegrationTestBase {
 
     open fun integration_testExecuteStreaming(model: LLModel) = runTest(timeout = 300.seconds) {
         Models.assumeAvailable(model.provider)
-        if (model.id == OpenAIModels.Audio.GPT4oAudio.id || model.id == OpenAIModels.Audio.GPT4oMiniAudio.id) {
-            assumeTrue(false, "https://github.com/JetBrains/koog/issues/231")
-        }
 
         val executor = getExecutor(model)
 
@@ -196,13 +195,15 @@ abstract class ExecutorIntegrationTestBase {
             with(StringBuilder()) {
                 val endMessages = mutableListOf<StreamFrame.End>()
                 val toolMessages = mutableListOf<StreamFrame.ToolCall>()
-                executor.executeStreaming(prompt, model).collect {
-                    when (it) {
-                        is StreamFrame.Append -> append(it.text)
-                        is StreamFrame.End -> endMessages.add(it)
-                        is StreamFrame.ToolCall -> toolMessages.add(it)
-                    }
-                }
+
+                executor.executeStreamAndCollect(
+                    prompt = prompt,
+                    model = model,
+                    appendable = this,
+                    endMessages = endMessages,
+                    toolMessages = toolMessages
+                )
+
                 length shouldNotBe (0)
                 toolMessages.shouldBeEmpty()
                 when (model.provider) {
@@ -221,6 +222,42 @@ abstract class ExecutorIntegrationTestBase {
         }
     }
 
+    open fun integration_testExecuteStreamingWithTools(model: LLModel) = runTest(timeout = 300.seconds) {
+        Models.assumeAvailable(model.provider)
+        assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
+
+        val executor = getExecutor(model)
+
+        val prompt = Prompt.build("test-streaming", LLMParams(toolChoice = ToolChoice.Required)) {
+            system("You are a helpful assistant.")
+            user("Count three times five")
+        }
+
+        withRetry(times = 3, testName = "integration_testExecuteStreamingWithTools[${model.id}]") {
+            with(StringBuilder()) {
+                val endMessages = mutableListOf<StreamFrame.End>()
+                val toolMessages = mutableListOf<StreamFrame.ToolCall>()
+
+                executor.executeStreamAndCollect(
+                    prompt = prompt,
+                    model = model,
+                    tools = listOf(SimpleCalculatorTool.descriptor),
+                    appendable = this,
+                    endMessages = endMessages,
+                    toolMessages = toolMessages
+                )
+
+                toolMessages.shouldNotBeEmpty()
+                withClue("Expected calculator tool call but got: [$toolMessages]") {
+                    toolMessages.any {
+                        it.name == SimpleCalculatorTool.name &&
+                            it.content.contains(CalculatorOperation.MULTIPLY.name, ignoreCase = true)
+                    } shouldBe true
+                }
+            }
+        }
+    }
+
     open fun integration_testToolWithRequiredParams(model: LLModel) = runTest(timeout = 300.seconds) {
         Models.assumeAvailable(model.provider)
         assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
@@ -780,7 +817,7 @@ abstract class ExecutorIntegrationTestBase {
 
         val prompt = calculatorPrompt
 
-        /** tool choice auto is default and thus is tested by [integration_testToolWithRequiredParams] */
+        /* tool choice auto is default and thus is tested by [integration_testToolWithRequiredParams] */
 
         withRetry(times = 3, testName = "integration_testToolChoiceRequired[${model.id}]") {
             with(
@@ -1068,7 +1105,10 @@ abstract class ExecutorIntegrationTestBase {
 
         val prompt2 = Prompt(
             id = "reasoning-multistep-2",
-            messages = prompt1.messages + response1 + Message.User(ContentPart.Text("Multiply the result by 2."), metaInfo = RequestMetaInfo.Empty),
+            messages = prompt1.messages + response1 + Message.User(
+                ContentPart.Text("Multiply the result by 2."),
+                metaInfo = RequestMetaInfo.Empty
+            ),
             params = params
         )
 
@@ -1080,3 +1120,20 @@ abstract class ExecutorIntegrationTestBase {
         }
     }
 }
+
+private suspend fun PromptExecutor.executeStreamAndCollect(
+    prompt: Prompt,
+    model: LLModel,
+    tools: List<ToolDescriptor> = emptyList(),
+    appendable: Appendable,
+    endMessages: MutableList<StreamFrame.End>,
+    toolMessages: MutableList<StreamFrame.ToolCall>
+) {
+    this.executeStreaming(prompt, model, tools).collect { frame ->
+        when (frame) {
+            is StreamFrame.Append -> appendable.append(frame.text)
+            is StreamFrame.End -> endMessages.add(frame)
+            is StreamFrame.ToolCall -> toolMessages.add(frame)
+        }
+    }
+}
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/MultipleLLMPromptExecutorIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/MultipleLLMPromptExecutorIntegrationTest.kt
@@ -10,6 +10,7 @@ import ai.koog.integration.tests.utils.getLLMClientForProvider
 import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
 import ai.koog.prompt.executor.model.PromptExecutor
 import ai.koog.prompt.llm.LLModel
+import org.junit.jupiter.api.Disabled
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.Arguments
 import org.junit.jupiter.params.provider.MethodSource
@@ -113,6 +114,13 @@ class MultipleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
         super.integration_testExecuteStreaming(model)
     }
 
+    @Disabled("KG-616")
+    @ParameterizedTest
+    @MethodSource("allCompletionModels")
+    override fun integration_testExecuteStreamingWithTools(model: LLModel) {
+        super.integration_testExecuteStreamingWithTools(model)
+    }
+
     @ParameterizedTest
     @MethodSource("allCompletionModels")
     override fun integration_testToolWithRequiredParams(model: LLModel) {
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/SingleLLMPromptExecutorIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/SingleLLMPromptExecutorIntegrationTest.kt
@@ -11,6 +11,7 @@ import ai.koog.prompt.executor.llms.SingleLLMPromptExecutor
 import ai.koog.prompt.executor.model.PromptExecutor
 import ai.koog.prompt.llm.LLMProvider
 import ai.koog.prompt.llm.LLModel
+import org.junit.jupiter.api.Disabled
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.Arguments
 import org.junit.jupiter.params.provider.MethodSource
@@ -107,6 +108,13 @@ class SingleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
         super.integration_testExecuteStreaming(model)
     }
 
+    @Disabled("KG-616")
+    @ParameterizedTest
+    @MethodSource("allCompletionModels")
+    override fun integration_testExecuteStreamingWithTools(model: LLModel) {
+        super.integration_testExecuteStreamingWithTools(model)
+    }
+
     @ParameterizedTest
     @MethodSource("allCompletionModels")
     override fun integration_testToolWithRequiredParams(model: LLModel) {
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt
@@ -19,6 +19,7 @@ object Models {
         return Stream.of(
             OpenAIModels.Chat.GPT5_1, // reasoning
             OpenAIModels.Chat.GPT4_1, // non-reasoning
+            OpenAIModels.Chat.GPT5_1Codex
         )
     }
 

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ object Models {`
`19`	`19`	`return Stream.of(`
`20`	`20`	`OpenAIModels.Chat.GPT5_1, // reasoning`
`21`	`21`	`OpenAIModels.Chat.GPT4_1, // non-reasoning`
	`22`	`+ OpenAIModels.Chat.GPT5_1Codex`
`22`	`23`	`)`
`23`	`24`	`}`
`24`	`25`