JetBrains
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/agent/AIAgentIntegrationTest.kt‎
Lines changed: 1 addition & 1 deletion b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/agent/AIAgentIntegrationTest.kt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/capabilities/ModelCapabilitiesIntegrationTest.kt‎
Lines changed: 11 additions & 7 deletions b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/capabilities/ModelCapabilitiesIntegrationTest.kt‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt‎
Lines changed: 78 additions & 35 deletions b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt‎
Lines changed: 78 additions & 35 deletions
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/OllamaExecutorIntegrationTest.kt‎
Lines changed: 1 addition & 1 deletion b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/OllamaExecutorIntegrationTest.kt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ToolSchemaExecutorIntegrationTest.kt‎
Lines changed: 2 additions & 2 deletions b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ToolSchemaExecutorIntegrationTest.kt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/mcp/McpServerTest.kt‎
Lines changed: 1 addition & 1 deletion b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/mcp/McpServerTest.kt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/MediaTestScenarios.kt‎
Lines changed: 1 addition & 2 deletions b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/MediaTestScenarios.kt‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/MediaTestUtils.kt‎
Lines changed: 0 additions & 4 deletions b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/MediaTestUtils.kt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt‎
Lines changed: 2 additions & 1 deletion b/‎integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt‎
Lines changed: 2 additions & 1 deletion
@@ -1007,7 +1007,7 @@ class AIAgentIntegrationTest : AIAgentTestBase() {
 
                 with(state) {
                     withClue("${CalculatorToolNoArgs.descriptor.name} tool should be called for model $model") {
-                        actualToolCalls shouldBe listOf(CalculatorToolNoArgs.descriptor.name)
+                        actualToolCalls.shouldContain(CalculatorToolNoArgs.descriptor.name)
                     }
 
                     errors.shouldBeEmpty()
 
@@ -20,6 +20,7 @@ import ai.koog.prompt.executor.clients.anthropic.AnthropicLLMClient
 import ai.koog.prompt.executor.clients.google.GoogleLLMClient
 import ai.koog.prompt.executor.clients.openai.OpenAIChatParams
 import ai.koog.prompt.executor.clients.openai.OpenAILLMClient
+import ai.koog.prompt.executor.clients.openai.OpenAIModels
 import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
 import ai.koog.prompt.executor.llms.all.DefaultMultiLLMPromptExecutor
 import ai.koog.prompt.llm.LLMCapability
@@ -146,7 +147,7 @@ class ModelCapabilitiesIntegrationTest {
                 LLMCapability.Tools, LLMCapability.ToolChoice -> {
                     val tools = SimpleCalculatorTool.descriptor
                     val prompt = prompt("cap-tools-positive", params = LLMParams(toolChoice = ToolChoice.Required)) {
-                        system("You are a helpful assistant with a calculator tool. Always use the tool.")
+                        system("You are a helpful assistant.")
                         user("Compute 2 + 3.")
                     }
                     withRetry {
@@ -157,10 +158,7 @@ class ModelCapabilitiesIntegrationTest {
                 }
 
                 LLMCapability.Vision.Image -> {
-                    val imagePath = getImageFileForScenario(
-                        MediaTestScenarios.ImageTestScenario.BASIC_PNG,
-                        testResourcesDir
-                    )
+                    val imagePath = testResourcesDir.resolve("basic.jpg")
                     val base64 = Base64.encode(imagePath.readBytes())
                     val prompt = prompt("cap-vision-image-positive") {
                         system("You are a helpful assistant that can describe images.")
@@ -169,8 +167,8 @@ class ModelCapabilitiesIntegrationTest {
                             image(
                                 ContentPart.Image(
                                     content = AttachmentContent.Binary.Base64(base64),
-                                    format = "png",
-                                    mimeType = "image/png"
+                                    format = "jpeg",
+                                    mimeType = "image/jpeg"
                                 )
                             )
                         }
@@ -204,6 +202,12 @@ class ModelCapabilitiesIntegrationTest {
                 }
 
                 LLMCapability.Document -> {
+                    // KG-620 GPT-5.1-Codex fails to process the text input file
+                    assumeTrue(
+                        model != OpenAIModels.Chat.GPT5_1Codex,
+                        "Skipping document capability test for ${model.id}, see KG-620"
+                    )
+
                     val file = createTextFileForScenario(
                         MediaTestScenarios.TextTestScenario.BASIC_TEXT,
                         testResourcesDir
 
@@ -46,6 +46,7 @@ import ai.koog.prompt.executor.clients.google.GoogleModels
 import ai.koog.prompt.executor.clients.google.GoogleParams
 import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
 import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
+import ai.koog.prompt.executor.clients.openai.OpenAIChatParams
 import ai.koog.prompt.executor.clients.openai.OpenAIModels
 import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
 import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
@@ -100,6 +101,8 @@ import kotlinx.io.files.Path as KtPath
 
 abstract class ExecutorIntegrationTestBase {
     private val testScope = TestScope()
+    private val basicLimit = 256
+    private val extendedLimit = 512
 
     @AfterEach
     fun cleanup() {
@@ -130,46 +133,76 @@ abstract class ExecutorIntegrationTestBase {
             is LLMProvider.OpenAI -> OpenAIResponsesParams(
                 reasoning = ReasoningConfig(
                     effort = ReasoningEffort.MEDIUM,
-                    summary = ReasoningSummary.DETAILED
+                    summary = ReasoningSummary.AUTO
                 ),
                 include = listOf(OpenAIInclude.REASONING_ENCRYPTED_CONTENT),
-                maxTokens = 256
+                maxTokens = basicLimit
             )
 
             is LLMProvider.Google -> {
                 val thinkingConfig = if (model.id == GoogleModels.Gemini3_Pro_Preview.id) {
                     GoogleThinkingConfig(
                         includeThoughts = true,
-                        thinkingLevel = GoogleThinkingLevel.LOW // with HIGH thoughts often exceed maxTokens causing test failures
+                        thinkingLevel = GoogleThinkingLevel.HIGH
                     )
                 } else {
                     GoogleThinkingConfig(
                         includeThoughts = true,
-                        thinkingBudget = 256
+                        // Slightly higher limit to avoid truncation in multi-step reasoning tests
+                        thinkingBudget = extendedLimit
                     )
                 }
                 GoogleParams(
                     thinkingConfig = thinkingConfig,
-                    maxTokens = 256
+                    // Slightly higher limit to avoid truncation in multi-step reasoning tests
+                    maxTokens = extendedLimit
                 )
             }
 
-            else -> LLMParams(maxTokens = 256)
+            else -> LLMParams(maxTokens = basicLimit)
         }
     }
 
+    private fun createNoReasoningParams(model: LLModel): LLMParams = when (model.provider) {
+        is LLMProvider.Anthropic -> AnthropicParams(
+            thinking = AnthropicThinking.Disabled()
+        )
+
+        is LLMProvider.OpenAI ->
+            if (model.capabilities.contains(LLMCapability.OpenAIEndpoint.Responses)) {
+                OpenAIResponsesParams(
+                    maxTokens = basicLimit
+                )
+            } else {
+                OpenAIChatParams(
+                    maxTokens = basicLimit
+                )
+            }
+
+        is LLMProvider.Google ->
+            GoogleParams(
+                thinkingConfig = GoogleThinkingConfig(
+                    includeThoughts = false,
+                ),
+                // Slightly higher limit to avoid truncation in multi-step reasoning tests
+                maxTokens = extendedLimit
+            )
+
+        else -> LLMParams(maxTokens = basicLimit)
+    }
+
     open fun integration_testExecute(model: LLModel) = runTest(timeout = 300.seconds) {
         Models.assumeAvailable(model.provider)
 
-        val prompt = Prompt.build("test-prompt") {
+        val prompt = Prompt.build("test-prompt", createNoReasoningParams(model)) {
             system("You are a helpful assistant.")
             user("What is the capital of France?")
         }
 
         withRetry(times = 3, testName = "integration_testExecute[${model.id}]") {
             getExecutor(model).execute(prompt, model) shouldNotBeNull {
                 shouldNotBeEmpty()
-                with(shouldForAny { it is Message.Assistant }.first()) {
+                filterIsInstance<Message.Assistant>().firstOrNull().shouldNotBeNull {
                     content.lowercase().shouldContain("paris")
                     with(metaInfo) {
                         inputTokensCount.shouldNotBeNull()
@@ -648,9 +681,11 @@ abstract class ExecutorIntegrationTestBase {
         }
 
         withRetry {
-            with(getExecutor(model).execute(prompt, model).single()) {
+            with(
+                getExecutor(model).execute(prompt, model)
+                    .first { it is Message.Assistant && it.content.isNotBlank() }
+            ) {
                 checkExecutorMediaResponse(this)
-                content.shouldContain("image")
             }
         }
     }
@@ -665,7 +700,7 @@ abstract class ExecutorIntegrationTestBase {
         )
 
         val imageUrl =
-            "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c3/Python-logo-notext.svg/1200px-Python-logo-notext.svg.png"
+            "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/PNG_Test.png/200px-PNG_Test.png"
 
         val prompt = prompt("url-based-attachments-test") {
             system("You are a helpful assistant that can analyze images.")
@@ -683,8 +718,9 @@ abstract class ExecutorIntegrationTestBase {
             with(getExecutor(model).execute(prompt, model).single()) {
                 checkExecutorMediaResponse(this)
                 content.lowercase()
-                    .shouldContain("python")
-                    .shouldContain("logo")
+                    .shouldContain("image")
+                    .shouldContain("test")
+                    .shouldContain("hat")
             }
         }
     }
@@ -885,13 +921,16 @@ abstract class ExecutorIntegrationTestBase {
     open fun integration_testMultipleSystemMessages(model: LLModel) = runTest(timeout = 300.seconds) {
         Models.assumeAvailable(model.provider)
 
-        val prompt = prompt("multiple-system-messages-test") {
+        val prompt = prompt("multiple-system-messages-test", createNoReasoningParams(model)) {
             system("You are a helpful assistant.")
             user("Hi")
             system("You can handle multiple system messages.")
             user("Respond with a short message.")
         }
-        getLLMClient(model).execute(prompt, model).single().role shouldBe Message.Role.Assistant
+        with(getLLMClient(model).execute(prompt, model)) {
+            shouldNotBeEmpty()
+            shouldForAny { it is Message.Assistant }
+        }
     }
 
     open fun integration_testSingleMessageModeration(model: LLModel) = runTest(timeout = 300.seconds) {
@@ -1016,7 +1055,8 @@ abstract class ExecutorIntegrationTestBase {
             getLLMClient(model).execute(prompt, model) shouldNotBeNull {
                 shouldNotBeEmpty()
                 withClue("No reasoning messages found") { shouldForAny { it is Message.Reasoning } }
-                assertResponseContainsReasoning(this)
+                // Some Google models aren't providing meta info
+                assertResponseContainsReasoning(this, model.provider != LLMProvider.Google)
             }
         }
     }
@@ -1079,18 +1119,38 @@ abstract class ExecutorIntegrationTestBase {
         withRetry(times = 3, testName = "integration_testReasoningMultiStep_Turn2[${model.id}]") {
             val response2 = client.execute(prompt2, model)
             response2.shouldNotBeEmpty()
-            val answer = response2.filterIsInstance<Message.Assistant>().first().content
+            val answer = response2.filter { it is Message.Assistant || it is Message.Reasoning }
+                .joinToString("") { it.content }
             answer.shouldContain("20")
         }
     }
 
     open fun integration_testExecuteStreamingWithTools(model: LLModel) = runTest(timeout = 300.seconds) {
         Models.assumeAvailable(model.provider)
         assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
+        assumeTrue(
+            model.provider !== LLMProvider.OpenRouter,
+            "KG-626 Error from OpenRouter on a streaming with a tool call"
+        )
+        assumeTrue(
+            model.provider !== LLMProvider.Bedrock,
+            "KG-627 Error from Bedrock executor on a streaming with a tool call"
+        )
 
         val executor = getExecutor(model)
 
-        val prompt = Prompt.build("test-streaming", LLMParams(toolChoice = ToolChoice.Required)) {
+        val params = when (model.provider) {
+            LLMProvider.OpenAI ->
+                if (model.capabilities.contains(LLMCapability.OpenAIEndpoint.Responses)) {
+                    OpenAIResponsesParams(toolChoice = ToolChoice.Required)
+                } else {
+                    OpenAIChatParams(toolChoice = ToolChoice.Required)
+                }
+
+            else -> LLMParams(toolChoice = ToolChoice.Required)
+        }
+
+        val prompt = Prompt.build("test-streaming", params) {
             system("You are a helpful assistant.")
             user("Count three times five")
         }
@@ -1119,23 +1179,6 @@ abstract class ExecutorIntegrationTestBase {
             }
         }
     }
-
-    private suspend fun PromptExecutor.executeStreamAndCollect(
-        prompt: Prompt,
-        model: LLModel,
-        tools: List<ToolDescriptor>,
-        appendable: StringBuilder,
-        endMessages: MutableList<StreamFrame.End>,
-        toolMessages: MutableList<StreamFrame.ToolCall>
-    ) {
-        executeStreaming(prompt, model, tools).collect { frame ->
-            when (frame) {
-                is StreamFrame.Append -> appendable.append(frame.text)
-                is StreamFrame.ToolCall -> toolMessages.add(frame)
-                is StreamFrame.End -> endMessages.add(frame)
-            }
-        }
-    }
 }
 
 private suspend fun PromptExecutor.executeStreamAndCollect(
 
@@ -296,7 +296,7 @@ class OllamaExecutorIntegrationTest : ExecutorIntegrationTestBase() {
 
             when (scenario) {
                 ImageTestScenario.BASIC_PNG, ImageTestScenario.BASIC_JPG,
-                ImageTestScenario.SMALL_IMAGE, ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {
+                ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {
                     checkExecutorMediaResponse(response)
                     response.content.shouldNotBeBlank()
                 }
 
@@ -76,7 +76,7 @@ class ToolSchemaExecutorIntegrationTest {
                     ),
                     "Invalid 'tools[0].function.name': empty string. Expected a string with minimum length 1, but got an empty string instead."
                 ),
-                // Todo uncomment when KG-185 is fixed
+                // Uncomment when KG-185 is fixed
                 /*Arguments.of(
                     ToolDescriptor(
                         name = "test_tool",
@@ -154,7 +154,7 @@ class ToolSchemaExecutorIntegrationTest {
                 shouldNotBeEmpty()
                 with(Json.decodeFromString<FileOperation>(joinToString("\n") { it.content })) {
                     filePath shouldBe "hello.txt"
-                    content shouldBe "Hello, World!"
+                    content.trim() shouldBe "Hello, World!"
                 }
             }
         }
 
@@ -35,7 +35,7 @@ class McpServerTest {
         @JvmStatic
         fun getModels() = listOf(
             OpenAIModels.Chat.GPT4o,
-            // ToDo enable when fixed: KG-588 singleRunStrategy outputs empty response when using an MCP server
+            // Enable when fixed: KG-588 singleRunStrategy outputs empty response when using an MCP server
             // GoogleModels.Gemini2_5FlashLite
         )
     }
 
@@ -15,7 +15,6 @@ object MediaTestScenarios {
         CORRUPTED_IMAGE,
         LARGE_IMAGE, // 20MB for Gemini and OpenAI, 5 MB for Anthropic
         LARGE_IMAGE_ANTHROPIC, // 20MB for Gemini and OpenAI, 5 MB for Anthropic
-        SMALL_IMAGE // 1x1 pixel
     }
 
     enum class TextTestScenario {
@@ -63,7 +62,7 @@ object MediaTestScenarios {
     val models = listOf(
         AnthropicModels.Sonnet_4_5,
         GoogleModels.Gemini2_5Pro,
-        OpenAIModels.Chat.GPT5_2,
+        OpenAIModels.Chat.GPT5_1,
     )
 
     @JvmStatic
 
@@ -34,10 +34,6 @@ object MediaTestUtils {
             MediaTestScenarios.ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {
                 testResourcesDir.resolve("large_5.jpg")
             }
-
-            MediaTestScenarios.ImageTestScenario.SMALL_IMAGE -> {
-                testResourcesDir.resolve("small.png")
-            }
         }
     }
 
 
@@ -103,7 +103,8 @@ object Models {
     @JvmStatic
     fun reasoningCapableModels(): Stream<LLModel> {
         return Stream.of(
-            OpenAIModels.Chat.GPT5_2,
+            // Replaced 5.2 with 5.1-Codex because of the unstable 5.2 behaviour, see KG-625
+            OpenAIModels.Chat.GPT5_1Codex,
             AnthropicModels.Haiku_4_5,
             GoogleModels.Gemini2_5Pro,
             GoogleModels.Gemini3_Pro_Preview,
Original file line number	Diff line number	Diff line change
`@@ -1007,7 +1007,7 @@ class AIAgentIntegrationTest : AIAgentTestBase() {`
`1007`	`1007`
`1008`	`1008`	`with(state) {`
`1009`	`1009`	`withClue("${CalculatorToolNoArgs.descriptor.name} tool should be called for model $model") {`
`1010`		`- actualToolCalls shouldBe listOf(CalculatorToolNoArgs.descriptor.name)`
	`1010`	`+ actualToolCalls.shouldContain(CalculatorToolNoArgs.descriptor.name)`
`1011`	`1011`	`}`
`1012`	`1012`
`1013`	`1013`	`errors.shouldBeEmpty()`
Original file line number	Diff line number	Diff line change
`@@ -296,7 +296,7 @@ class OllamaExecutorIntegrationTest : ExecutorIntegrationTestBase() {`
`296`	`296`
`297`	`297`	`when (scenario) {`
`298`	`298`	`ImageTestScenario.BASIC_PNG, ImageTestScenario.BASIC_JPG,`
`299`		`- ImageTestScenario.SMALL_IMAGE, ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {`
	`299`	`+ ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {`
`300`	`300`	`checkExecutorMediaResponse(response)`
`301`	`301`	`response.content.shouldNotBeBlank()`
`302`	`302`	`}`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ class ToolSchemaExecutorIntegrationTest {`
`76`	`76`	`),`
`77`	`77`	`"Invalid 'tools[0].function.name': empty string. Expected a string with minimum length 1, but got an empty string instead."`
`78`	`78`	`),`
`79`		`- // Todo uncomment when KG-185 is fixed`
	`79`	`+ // Uncomment when KG-185 is fixed`
`80`	`80`	`/*Arguments.of(`
`81`	`81`	`ToolDescriptor(`
`82`	`82`	`name = "test_tool",`
`@@ -154,7 +154,7 @@ class ToolSchemaExecutorIntegrationTest {`
`154`	`154`	`shouldNotBeEmpty()`
`155`	`155`	`with(Json.decodeFromString<FileOperation>(joinToString("\n") { it.content })) {`
`156`	`156`	`filePath shouldBe "hello.txt"`
`157`		`- content shouldBe "Hello, World!"`
	`157`	`+ content.trim() shouldBe "Hello, World!"`
`158`	`158`	`}`
`159`	`159`	`}`
`160`	`160`	`}`
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ class McpServerTest {`
`35`	`35`	`@JvmStatic`
`36`	`36`	`fun getModels() = listOf(`
`37`	`37`	`OpenAIModels.Chat.GPT4o,`
`38`		`- // ToDo enable when fixed: KG-588 singleRunStrategy outputs empty response when using an MCP server`
	`38`	`+ // Enable when fixed: KG-588 singleRunStrategy outputs empty response when using an MCP server`
`39`	`39`	`// GoogleModels.Gemini2_5FlashLite`
`40`	`40`	`)`
`41`	`41`	`}`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,6 @@ object MediaTestScenarios {`
`15`	`15`	`CORRUPTED_IMAGE,`
`16`	`16`	`LARGE_IMAGE, // 20MB for Gemini and OpenAI, 5 MB for Anthropic`
`17`	`17`	`LARGE_IMAGE_ANTHROPIC, // 20MB for Gemini and OpenAI, 5 MB for Anthropic`
`18`		`- SMALL_IMAGE // 1x1 pixel`
`19`	`18`	`}`
`20`	`19`
`21`	`20`	`enum class TextTestScenario {`
`@@ -63,7 +62,7 @@ object MediaTestScenarios {`
`63`	`62`	`val models = listOf(`
`64`	`63`	`AnthropicModels.Sonnet_4_5,`
`65`	`64`	`GoogleModels.Gemini2_5Pro,`
`66`		`- OpenAIModels.Chat.GPT5_2,`
	`65`	`+ OpenAIModels.Chat.GPT5_1,`
`67`	`66`	`)`
`68`	`67`
`69`	`68`	`@JvmStatic`
Original file line number	Diff line number	Diff line change
`@@ -34,10 +34,6 @@ object MediaTestUtils {`
`34`	`34`	`MediaTestScenarios.ImageTestScenario.LARGE_IMAGE_ANTHROPIC -> {`
`35`	`35`	`testResourcesDir.resolve("large_5.jpg")`
`36`	`36`	`}`
`37`		`-`
`38`		`- MediaTestScenarios.ImageTestScenario.SMALL_IMAGE -> {`
`39`		`- testResourcesDir.resolve("small.png")`
`40`		`- }`
`41`	`37`	`}`
`42`	`38`	`}`
`43`	`39`