@@ -46,6 +46,7 @@ import ai.koog.prompt.executor.clients.google.GoogleModels
4646import ai.koog.prompt.executor.clients.google.GoogleParams
4747import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
4848import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
49+ import ai.koog.prompt.executor.clients.openai.OpenAIChatParams
4950import ai.koog.prompt.executor.clients.openai.OpenAIModels
5051import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
5152import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
@@ -100,6 +101,8 @@ import kotlinx.io.files.Path as KtPath
100101
101102abstract class ExecutorIntegrationTestBase {
102103 private val testScope = TestScope ()
104+ private val basicLimit = 256
105+ private val extendedLimit = 512
103106
104107 @AfterEach
105108 fun cleanup () {
@@ -130,46 +133,76 @@ abstract class ExecutorIntegrationTestBase {
130133 is LLMProvider .OpenAI -> OpenAIResponsesParams (
131134 reasoning = ReasoningConfig (
132135 effort = ReasoningEffort .MEDIUM ,
133- summary = ReasoningSummary .DETAILED
136+ summary = ReasoningSummary .AUTO
134137 ),
135138 include = listOf (OpenAIInclude .REASONING_ENCRYPTED_CONTENT ),
136- maxTokens = 256
139+ maxTokens = basicLimit
137140 )
138141
139142 is LLMProvider .Google -> {
140143 val thinkingConfig = if (model.id == GoogleModels .Gemini3_Pro_Preview .id) {
141144 GoogleThinkingConfig (
142145 includeThoughts = true ,
143- thinkingLevel = GoogleThinkingLevel .LOW // with HIGH thoughts often exceed maxTokens causing test failures
146+ thinkingLevel = GoogleThinkingLevel .HIGH
144147 )
145148 } else {
146149 GoogleThinkingConfig (
147150 includeThoughts = true ,
148- thinkingBudget = 256
151+ // Slightly higher limit to avoid truncation in multi-step reasoning tests
152+ thinkingBudget = extendedLimit
149153 )
150154 }
151155 GoogleParams (
152156 thinkingConfig = thinkingConfig,
153- maxTokens = 256
157+ // Slightly higher limit to avoid truncation in multi-step reasoning tests
158+ maxTokens = extendedLimit
154159 )
155160 }
156161
157- else -> LLMParams (maxTokens = 256 )
162+ else -> LLMParams (maxTokens = basicLimit )
158163 }
159164 }
160165
166+ private fun createNoReasoningParams (model : LLModel ): LLMParams = when (model.provider) {
167+ is LLMProvider .Anthropic -> AnthropicParams (
168+ thinking = AnthropicThinking .Disabled ()
169+ )
170+
171+ is LLMProvider .OpenAI ->
172+ if (model.capabilities.contains(LLMCapability .OpenAIEndpoint .Responses )) {
173+ OpenAIResponsesParams (
174+ maxTokens = basicLimit
175+ )
176+ } else {
177+ OpenAIChatParams (
178+ maxTokens = basicLimit
179+ )
180+ }
181+
182+ is LLMProvider .Google ->
183+ GoogleParams (
184+ thinkingConfig = GoogleThinkingConfig (
185+ includeThoughts = false ,
186+ ),
187+ // Slightly higher limit to avoid truncation in multi-step reasoning tests
188+ maxTokens = extendedLimit
189+ )
190+
191+ else -> LLMParams (maxTokens = basicLimit)
192+ }
193+
161194 open fun integration_testExecute (model : LLModel ) = runTest(timeout = 300 .seconds) {
162195 Models .assumeAvailable(model.provider)
163196
164- val prompt = Prompt .build(" test-prompt" ) {
197+ val prompt = Prompt .build(" test-prompt" , createNoReasoningParams(model) ) {
165198 system(" You are a helpful assistant." )
166199 user(" What is the capital of France?" )
167200 }
168201
169202 withRetry(times = 3 , testName = " integration_testExecute[${model.id} ]" ) {
170203 getExecutor(model).execute(prompt, model) shouldNotBeNull {
171204 shouldNotBeEmpty()
172- with (shouldForAny { it is Message .Assistant }.first()) {
205+ filterIsInstance< Message .Assistant >().firstOrNull().shouldNotBeNull {
173206 content.lowercase().shouldContain(" paris" )
174207 with (metaInfo) {
175208 inputTokensCount.shouldNotBeNull()
@@ -648,9 +681,11 @@ abstract class ExecutorIntegrationTestBase {
648681 }
649682
650683 withRetry {
651- with (getExecutor(model).execute(prompt, model).single()) {
684+ with (
685+ getExecutor(model).execute(prompt, model)
686+ .first { it is Message .Assistant && it.content.isNotBlank() }
687+ ) {
652688 checkExecutorMediaResponse(this )
653- content.shouldContain(" image" )
654689 }
655690 }
656691 }
@@ -665,7 +700,7 @@ abstract class ExecutorIntegrationTestBase {
665700 )
666701
667702 val imageUrl =
668- " https://upload.wikimedia.org/wikipedia/commons/thumb/c/c3/Python-logo-notext.svg/1200px-Python-logo-notext.svg .png"
703+ " https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/PNG_Test.png/200px-PNG_Test .png"
669704
670705 val prompt = prompt(" url-based-attachments-test" ) {
671706 system(" You are a helpful assistant that can analyze images." )
@@ -683,8 +718,9 @@ abstract class ExecutorIntegrationTestBase {
683718 with (getExecutor(model).execute(prompt, model).single()) {
684719 checkExecutorMediaResponse(this )
685720 content.lowercase()
686- .shouldContain(" python" )
687- .shouldContain(" logo" )
721+ .shouldContain(" image" )
722+ .shouldContain(" test" )
723+ .shouldContain(" hat" )
688724 }
689725 }
690726 }
@@ -885,13 +921,16 @@ abstract class ExecutorIntegrationTestBase {
885921 open fun integration_testMultipleSystemMessages (model : LLModel ) = runTest(timeout = 300 .seconds) {
886922 Models .assumeAvailable(model.provider)
887923
888- val prompt = prompt(" multiple-system-messages-test" ) {
924+ val prompt = prompt(" multiple-system-messages-test" , createNoReasoningParams(model) ) {
889925 system(" You are a helpful assistant." )
890926 user(" Hi" )
891927 system(" You can handle multiple system messages." )
892928 user(" Respond with a short message." )
893929 }
894- getLLMClient(model).execute(prompt, model).single().role shouldBe Message .Role .Assistant
930+ with (getLLMClient(model).execute(prompt, model)) {
931+ shouldNotBeEmpty()
932+ shouldForAny { it is Message .Assistant }
933+ }
895934 }
896935
897936 open fun integration_testSingleMessageModeration (model : LLModel ) = runTest(timeout = 300 .seconds) {
@@ -1016,7 +1055,8 @@ abstract class ExecutorIntegrationTestBase {
10161055 getLLMClient(model).execute(prompt, model) shouldNotBeNull {
10171056 shouldNotBeEmpty()
10181057 withClue(" No reasoning messages found" ) { shouldForAny { it is Message .Reasoning } }
1019- assertResponseContainsReasoning(this )
1058+ // Some Google models aren't providing meta info
1059+ assertResponseContainsReasoning(this , model.provider != LLMProvider .Google )
10201060 }
10211061 }
10221062 }
@@ -1079,18 +1119,38 @@ abstract class ExecutorIntegrationTestBase {
10791119 withRetry(times = 3 , testName = " integration_testReasoningMultiStep_Turn2[${model.id} ]" ) {
10801120 val response2 = client.execute(prompt2, model)
10811121 response2.shouldNotBeEmpty()
1082- val answer = response2.filterIsInstance<Message .Assistant >().first().content
1122+ val answer = response2.filter { it is Message .Assistant || it is Message .Reasoning }
1123+ .joinToString(" " ) { it.content }
10831124 answer.shouldContain(" 20" )
10841125 }
10851126 }
10861127
10871128 open fun integration_testExecuteStreamingWithTools (model : LLModel ) = runTest(timeout = 300 .seconds) {
10881129 Models .assumeAvailable(model.provider)
10891130 assumeTrue(model.capabilities.contains(LLMCapability .Tools ), " Model $model does not support tools" )
1131+ assumeTrue(
1132+ model.provider != = LLMProvider .OpenRouter ,
1133+ " KG-626 Error from OpenRouter on a streaming with a tool call"
1134+ )
1135+ assumeTrue(
1136+ model.provider != = LLMProvider .Bedrock ,
1137+ " KG-627 Error from Bedrock executor on a streaming with a tool call"
1138+ )
10901139
10911140 val executor = getExecutor(model)
10921141
1093- val prompt = Prompt .build(" test-streaming" , LLMParams (toolChoice = ToolChoice .Required )) {
1142+ val params = when (model.provider) {
1143+ LLMProvider .OpenAI ->
1144+ if (model.capabilities.contains(LLMCapability .OpenAIEndpoint .Responses )) {
1145+ OpenAIResponsesParams (toolChoice = ToolChoice .Required )
1146+ } else {
1147+ OpenAIChatParams (toolChoice = ToolChoice .Required )
1148+ }
1149+
1150+ else -> LLMParams (toolChoice = ToolChoice .Required )
1151+ }
1152+
1153+ val prompt = Prompt .build(" test-streaming" , params) {
10941154 system(" You are a helpful assistant." )
10951155 user(" Count three times five" )
10961156 }
@@ -1119,23 +1179,6 @@ abstract class ExecutorIntegrationTestBase {
11191179 }
11201180 }
11211181 }
1122-
1123- private suspend fun PromptExecutor.executeStreamAndCollect (
1124- prompt : Prompt ,
1125- model : LLModel ,
1126- tools : List <ToolDescriptor >,
1127- appendable : StringBuilder ,
1128- endMessages : MutableList <StreamFrame .End >,
1129- toolMessages : MutableList <StreamFrame .ToolCall >
1130- ) {
1131- executeStreaming(prompt, model, tools).collect { frame ->
1132- when (frame) {
1133- is StreamFrame .Append -> appendable.append(frame.text)
1134- is StreamFrame .ToolCall -> toolMessages.add(frame)
1135- is StreamFrame .End -> endMessages.add(frame)
1136- }
1137- }
1138- }
11391182}
11401183
11411184private suspend fun PromptExecutor.executeStreamAndCollect (
0 commit comments