Add multi-LLM support with configurable compression model and file logging (#1394)

Faanbaria · web-flow · commit ea6cc04a8d98 · 2026-01-26T12:17:19.000+01:00
This PR adds support for using different AI models for different tasks: Claude Sonnet 4.5 for the main agent work and a cheaper model (GPT-4.1-mini) for compressing conversation history. It also adds file logging to track what the agent is doing. --- #### Type of the changes - [ ] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Tests improvement - [x] Refactoring - [ ] CI/CD changes - [ ] Dependencies update #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [ ] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [ ] An issue describing the proposed change exists - [ ] The pull request includes a link to the issue - [ ] The change was discussed and approved in the issue - [ ] Docs have been added / updated
diff --git a/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/ext/agent/SingleRunStrategyWithHistoryCompression.kt b/agents/agents-core/src/commonMain/kotlin/ai/koog/agents/ext/agent/SingleRunStrategyWithHistoryCompression.kt
@@ -18,6 +18,7 @@ import ai.koog.agents.core.dsl.extension.onMultipleToolCalls
 import ai.koog.agents.core.dsl.extension.onToolCall
 import ai.koog.agents.core.environment.ReceivedToolResult
 import ai.koog.prompt.dsl.Prompt
+import ai.koog.prompt.llm.LLModel
 import ai.koog.prompt.message.Message
 
 /**
@@ -27,10 +28,12 @@ import ai.koog.prompt.message.Message
  *   when the message count or token size exceeds a threshold
  * @property compressionStrategy [HistoryCompressionStrategy] implementation that defines
  *   how to compress the conversation history
+ * @property retrievalModel Optional [LLModel] to use for compression (defaults to agent's model)
  */
 public data class HistoryCompressionConfig(
     val isHistoryTooBig: (Prompt) -> Boolean,
-    val compressionStrategy: HistoryCompressionStrategy
+    val compressionStrategy: HistoryCompressionStrategy,
+    val retrievalModel: LLModel? = null
 )
 
 /**
@@ -40,8 +43,8 @@ public data class HistoryCompressionConfig(
  * if the conversation history becomes too large (based on [HistoryCompressionConfig.isHistoryTooBig]),
  * it compresses the message list to essential facts before continuing.
  *
- * @param config specifies when to trigger compression (size threshold) and how to compress
- *   (fact extraction strategy)
+ * @param config specifies when to trigger compression (size threshold), how to compress
+ *   (fact extraction strategy), and optionally which model to use for compression
  * @param runMode how tools are executed: [ToolCalls.SINGLE_RUN_SEQUENTIAL] (one tool per LLM call),
  *   [ToolCalls.SEQUENTIAL] (multiple tools per call, executed sequentially), or [ToolCalls.PARALLEL]
  *   (multiple tools per call, executed concurrently)
@@ -64,7 +67,10 @@ private fun singleRunWithHistoryCompressionParallelAbility(
     val nodeCallLLM by nodeLLMRequestMultiple()
     val nodeExecuteTool by nodeExecuteMultipleTools(parallelTools = parallelTools)
     val nodeSendToolResult by nodeLLMSendMultipleToolResults()
-    val nodeCompressHistory by nodeLLMCompressHistory<List<ReceivedToolResult>>(strategy = config.compressionStrategy)
+    val nodeCompressHistory by nodeLLMCompressHistory<List<ReceivedToolResult>>(
+        strategy = config.compressionStrategy,
+        retrievalModel = config.retrievalModel
+    )
     val nodeSendCompressedHistory by node<List<ReceivedToolResult>, List<Message.Response>> {
         llm.writeSession {
             requestLLMMultiple()
@@ -104,7 +110,10 @@ private fun singleRunWithHistoryCompressionModeStrategy(config: HistoryCompressi
     val nodeCallLLM by nodeLLMRequest()
     val nodeExecuteTool by nodeExecuteTool()
     val nodeSendToolResult by nodeLLMSendToolResult()
-    val compressHistory by nodeLLMCompressHistory<ReceivedToolResult>(strategy = config.compressionStrategy)
+    val compressHistory by nodeLLMCompressHistory<ReceivedToolResult>(
+        strategy = config.compressionStrategy,
+        retrievalModel = config.retrievalModel
+    )
     val nodeSendCompressedHistory by node<ReceivedToolResult, Message.Response> {
         llm.writeSession {
             requestLLM()
diff --git a/examples/code-agent/step-05-history/README.md b/examples/code-agent/step-05-history/README.md
@@ -5,12 +5,14 @@ A code agent with history compression that handles long-running tasks without hi
 ## Prerequisites
 
 - Java 17+
-- OpenAI API key
+- Anthropic API key (main agent)
+- OpenAI API key (for history compression and find sub-agent)
 - (Optional) Langfuse credentials for observability
 
 ## Setup
 
 ```bash
+export ANTHROPIC_API_KEY=your_anthropic_key
 export OPENAI_API_KEY=your_openai_key
 ```
 
diff --git a/examples/code-agent/step-05-history/src/main/kotlin/CodeAgentHistoryCompressionConfig.kt b/examples/code-agent/step-05-history/src/main/kotlin/CodeAgentHistoryCompressionConfig.kt
@@ -16,7 +16,7 @@ val CODE_AGENT_HISTORY_TOO_BIG: (Prompt) -> Boolean = { prompt ->
  * Extracts key facts from conversation history.
  * LLM answers these questions, and the answers become the compressed history.
  */
-val CODE_AGENT_COMPRESSION = RetrieveFactsFromHistory(
+val CODE_AGENT_COMPRESSION_STRATEGY = RetrieveFactsFromHistory(
     Concept(
         "project-structure",
         "What is the structure of this project?",
diff --git a/examples/code-agent/step-05-history/src/main/kotlin/Main.kt b/examples/code-agent/step-05-history/src/main/kotlin/Main.kt
@@ -12,14 +12,22 @@ import ai.koog.agents.ext.tool.shell.ExecuteShellCommandTool
 import ai.koog.agents.ext.tool.shell.JvmShellCommandExecutor
 import ai.koog.agents.ext.tool.shell.PrintShellCommandConfirmationHandler
 import ai.koog.agents.ext.tool.shell.ShellCommandConfirmation
+import ai.koog.prompt.executor.clients.anthropic.AnthropicLLMClient
+import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
+import ai.koog.prompt.executor.clients.openai.OpenAILLMClient
 import ai.koog.prompt.executor.clients.openai.OpenAIModels
-import ai.koog.prompt.executor.llms.all.simpleOpenAIExecutor
+import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
+import ai.koog.prompt.llm.LLMProvider
 import ai.koog.rag.base.files.JVMFileSystemProvider
 
-val executor = simpleOpenAIExecutor(System.getenv("OPENAI_API_KEY"))
+val multiExecutor = MultiLLMPromptExecutor(
+    LLMProvider.Anthropic to AnthropicLLMClient(System.getenv("ANTHROPIC_API_KEY")),
+    LLMProvider.OpenAI to OpenAILLMClient(System.getenv("OPENAI_API_KEY"))
+)
+
 val agent = AIAgent(
-    promptExecutor = executor,
-    llmModel = OpenAIModels.Chat.GPT5Codex,
+    promptExecutor = multiExecutor,
+    llmModel = AnthropicModels.Sonnet_4_5,
     toolRegistry = ToolRegistry {
         tool(ListDirectoryTool(JVMFileSystemProvider.ReadOnly))
         tool(ReadFileTool(JVMFileSystemProvider.ReadOnly))
@@ -43,7 +51,8 @@ val agent = AIAgent(
     strategy = singleRunStrategyWithHistoryCompression(
         config = HistoryCompressionConfig(
             isHistoryTooBig = CODE_AGENT_HISTORY_TOO_BIG,
-            compressionStrategy = CODE_AGENT_COMPRESSION
+            compressionStrategy = CODE_AGENT_COMPRESSION_STRATEGY,
+            retrievalModel = OpenAIModels.Chat.GPT4_1Mini
         )
     ),
     maxIterations = 400
@@ -72,6 +81,6 @@ suspend fun main(args: Array<String>) {
         val result = agent.run(input)
         println(result)
     } finally {
-        executor.close()
+        multiExecutor.close()
     }
 }
diff --git a/examples/code-agent/step-05-history/src/main/kotlin/Observability.kt b/examples/code-agent/step-05-history/src/main/kotlin/Observability.kt
@@ -5,6 +5,9 @@ import ai.koog.agents.features.eventHandler.feature.handleEvents
 import ai.koog.agents.features.opentelemetry.attribute.CustomAttribute
 import ai.koog.agents.features.opentelemetry.feature.OpenTelemetry
 import ai.koog.agents.features.opentelemetry.integration.langfuse.addLangfuseExporter
+import io.github.oshai.kotlinlogging.KotlinLogging
+
+private val logger = KotlinLogging.logger("code-agent-events")
 
 /**
  * Extracted observability setup used by agents in this module.
@@ -21,7 +24,19 @@ fun GraphAIAgent.FeatureContext.setupObservability(agentName: String) {
     }
     handleEvents {
         onToolCallStarting { ctx ->
-            println("[$agentName] Tool '${ctx.toolName}' called with args: ${ctx.toolArgs.toString().take(100)}")
+            logger.info { "[$agentName] Tool '${ctx.toolName}' called with args: ${ctx.toolArgs.toString().take(100)}" }
+        }
+        onNodeExecutionStarting { ctx ->
+            if (ctx.node.name == "compressHistory") {
+                val messages = ctx.context.llm.prompt.messages
+                logger.info { "[$agentName] Pre-compression: ${messages.size} msgs, ${messages.sumOf { it.content.length }} chars" }
+            }
+        }
+        onNodeExecutionCompleted { ctx ->
+            if (ctx.node.name == "compressHistory") {
+                val messages = ctx.context.llm.prompt.messages
+                logger.info { "[$agentName] Post-compression: ${messages.size} msgs, ${messages.sumOf { it.content.length }} chars" }
+            }
         }
     }
 }
diff --git a/examples/code-agent/step-05-history/src/main/resources/logback.xml b/examples/code-agent/step-05-history/src/main/resources/logback.xml
@@ -5,7 +5,21 @@
         </encoder>
     </appender>
 
-    <root level="ERROR">
+    <appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
+        <file>${user.dir}/logs/code-agent.log</file>
+        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
+            <fileNamePattern>${user.dir}/logs/code-agent.%d{yyyy-MM-dd}.log</fileNamePattern>
+            <maxHistory>30</maxHistory>
+            <totalSizeCap>3GB</totalSizeCap>
+            <cleanHistoryOnStart>true</cleanHistoryOnStart>
+        </rollingPolicy>
+        <encoder>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
         <appender-ref ref="STDOUT" />
+        <appender-ref ref="FILE" />
     </root>
 </configuration>