Skip to content

Commit b21ecdc

Browse files
(KG-212) Introduce ResponseProcessor to fix tool call messages from weak models (#871)
<!-- Thank you for opening a pull request! Please add a brief description of the proposed change here. Also, please tick the appropriate points in the checklist below. --> [KG-212](https://youtrack.jetbrains.com/issue/KG-212) Add validation and retry logic for tool calling from local models PR introduces a generic `ResponseProcessor` which can process the messages received from the llm. A few out-of-the-box implementations aim to fix tool call messages, so that the agent follows the expected flow. ## Motivation and Context <!-- Why is this change needed? What problem does it solve? --> Small models (e.g. most ollama models) often fail to properly generate a tool call messages. Some problems are - incorrectly generated jsons - Incorrect tool name / parameter names - Message "I will call this tool" instead of a direct tool call `ResponseProcessor` and its implementations are designed to deal with such issues. ## Breaking Changes <!-- Will users need to update their code or configurations? --> --- #### Type of the changes - [x] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Tests improvement - [ ] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [x] An issue describing the proposed change exists - [x] The pull request includes a link to the issue - [x] The change was discussed and approved in the issue - [ ] Docs have been added / updated --------- Co-authored-by: Andrey Bragin <[email protected]>
1 parent c79c00d commit b21ecdc

File tree

44 files changed

+1624
-78
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1624
-78
lines changed

agents/agents-core/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ kotlin {
1717
api(project(":utils"))
1818
api(project(":prompt:prompt-executor:prompt-executor-model"))
1919
api(project(":prompt:prompt-llm"))
20+
api(project(":prompt:prompt-processor"))
2021
api(project(":prompt:prompt-structure"))
2122

2223
api(project(":prompt:prompt-executor:prompt-executor-clients:prompt-executor-openai-client"))

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/AIAgent.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import ai.koog.prompt.dsl.prompt
1313
import ai.koog.prompt.executor.model.PromptExecutor
1414
import ai.koog.prompt.llm.LLModel
1515
import ai.koog.prompt.params.LLMParams
16+
import ai.koog.prompt.processor.ResponseProcessor
1617
import ai.koog.utils.io.Closeable
1718
import kotlinx.datetime.Clock
1819
import kotlin.reflect.typeOf
@@ -259,6 +260,7 @@ public interface AIAgent<Input, Output> : Closeable {
259260
*
260261
* @param promptExecutor The executor responsible for processing language model prompts.
261262
* @param llmModel The specific large language model to be used for the agent.
263+
* @param responseProcessor The processor responsible for processing the model's responses.
262264
* @param strategy The strategy that defines the agent's workflow, defaulting to the [singleRunStrategy].
263265
* @param toolRegistry The set of tools available for the agent, defaulting to an empty registry.
264266
* @param id Unique identifier for the agent. Random UUID will be generated if set to null.
@@ -273,6 +275,7 @@ public interface AIAgent<Input, Output> : Closeable {
273275
public operator fun invoke(
274276
promptExecutor: PromptExecutor,
275277
llmModel: LLModel,
278+
responseProcessor: ResponseProcessor? = null,
276279
strategy: AIAgentGraphStrategy<String, String> = singleRunStrategy(),
277280
toolRegistry: ToolRegistry = ToolRegistry.EMPTY,
278281
id: String? = null,
@@ -297,6 +300,7 @@ public interface AIAgent<Input, Output> : Closeable {
297300
},
298301
model = llmModel,
299302
maxAgentIterations = maxIterations,
303+
responseProcessor = responseProcessor
300304
),
301305
toolRegistry = toolRegistry,
302306
installFeatures = installFeatures
@@ -310,6 +314,7 @@ public interface AIAgent<Input, Output> : Closeable {
310314
* @param promptExecutor An instance of [PromptExecutor] responsible for executing prompts with the language model.
311315
* @param llmModel The language model [LLModel] to be used by the agent.
312316
* @param strategy The agent strategy [AIAgentGraphStrategy] defining how the agent processes inputs and outputs.
317+
* @param responseProcessor The processor responsible for processing the model's responses.
313318
* @param toolRegistry An optional [ToolRegistry] specifying the tools available to the agent for execution. Defaults to `[ToolRegistry.EMPTY]`.
314319
* @param id Unique identifier for the agent. Random UUID will be generated if set to null.
315320
* @param clock A `Clock` instance used for time-related operations. Defaults to `Clock.System`.
@@ -325,6 +330,7 @@ public interface AIAgent<Input, Output> : Closeable {
325330
promptExecutor: PromptExecutor,
326331
llmModel: LLModel,
327332
strategy: AIAgentGraphStrategy<Input, Output>,
333+
responseProcessor: ResponseProcessor? = null,
328334
toolRegistry: ToolRegistry = ToolRegistry.EMPTY,
329335
id: String? = null,
330336
clock: Clock = Clock.System,
@@ -352,6 +358,7 @@ public interface AIAgent<Input, Output> : Closeable {
352358
},
353359
model = llmModel,
354360
maxAgentIterations = maxIterations,
361+
responseProcessor = responseProcessor
355362
),
356363
toolRegistry = toolRegistry,
357364
clock = clock,
@@ -367,6 +374,7 @@ public interface AIAgent<Input, Output> : Closeable {
367374
* @param Output The type of output produced by the agent.
368375
* @param promptExecutor The executor used to process prompts for the language model.
369376
* @param llmModel The language model configuration defining the underlying LLM instance and its behavior.
377+
* @param responseProcessor The processor responsible for processing the model's responses.
370378
* @param toolRegistry Registry containing tools available to the agent for use during execution. Default is an empty registry.
371379
* @param strategy The strategy to be executed by the agent. Default is a single-run strategy.
372380
* @param id Unique identifier for the agent. Random UUID will be generated if set to null.
@@ -380,6 +388,7 @@ public interface AIAgent<Input, Output> : Closeable {
380388
public operator fun <Input, Output> invoke(
381389
promptExecutor: PromptExecutor,
382390
llmModel: LLModel,
391+
responseProcessor: ResponseProcessor? = null,
383392
toolRegistry: ToolRegistry = ToolRegistry.EMPTY,
384393
strategy: AIAgentFunctionalStrategy<Input, Output>,
385394
id: String? = null,

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/AIAgentService.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import ai.koog.prompt.dsl.prompt
1111
import ai.koog.prompt.executor.model.PromptExecutor
1212
import ai.koog.prompt.llm.LLModel
1313
import ai.koog.prompt.params.LLMParams
14+
import ai.koog.prompt.processor.ResponseProcessor
1415
import kotlinx.coroutines.sync.Mutex
1516
import kotlinx.coroutines.sync.withLock
1617
import kotlinx.datetime.Clock
@@ -203,6 +204,7 @@ public interface AIAgentService<Input, Output, TAgent : AIAgent<Input, Output>>
203204
public operator fun invoke(
204205
promptExecutor: PromptExecutor,
205206
llmModel: LLModel,
207+
responseProcessor: ResponseProcessor? = null,
206208
strategy: AIAgentGraphStrategy<String, String> = singleRunStrategy(),
207209
toolRegistry: ToolRegistry = ToolRegistry.EMPTY,
208210
systemPrompt: String = "",
@@ -225,6 +227,7 @@ public interface AIAgentService<Input, Output, TAgent : AIAgent<Input, Output>>
225227
},
226228
model = llmModel,
227229
maxAgentIterations = maxIterations,
230+
responseProcessor = responseProcessor
228231
),
229232
toolRegistry = toolRegistry,
230233
installFeatures = installFeatures

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/FunctionalAIAgent.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ public class FunctionalAIAgent<Input, Output>(
8989
toolRegistry = toolRegistry,
9090
prompt = agentConfig.prompt,
9191
model = agentConfig.model,
92+
responseProcessor = agentConfig.responseProcessor,
9293
promptExecutor = PromptExecutorProxy(
9394
executor = promptExecutor,
9495
pipeline = pipeline,

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/GraphAIAgent.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ public open class GraphAIAgent<Input, Output>(
124124
toolRegistry = toolRegistry,
125125
prompt = agentConfig.prompt,
126126
model = agentConfig.model,
127+
responseProcessor = agentConfig.responseProcessor,
127128
promptExecutor = PromptExecutorProxy(
128129
executor = promptExecutor,
129130
pipeline = pipeline,

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/config/AIAgentConfig.kt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import ai.koog.prompt.dsl.Prompt
44
import ai.koog.prompt.dsl.prompt
55
import ai.koog.prompt.executor.clients.openai.OpenAIModels
66
import ai.koog.prompt.llm.LLModel
7+
import ai.koog.prompt.processor.ResponseProcessor
78

89
/**
910
* Configuration class for an AI agent that specifies the prompt, execution parameters, and behavior.
@@ -20,13 +21,15 @@ import ai.koog.prompt.llm.LLModel
2021
* This property provides a mechanism to convert or format missing tool calls and result messages when they occur,
2122
* typically due to differences in tool sets between steps or subgraphs while the same history is reused.
2223
* This ensures the prompt remains consistent and readable for the model, even with undefined tools.
24+
* @param responseProcessor Optional processor for the agent's responses. If specified, will modify the responses from the llm.
2325
*/
2426
public class AIAgentConfig(
2527
override val prompt: Prompt,
2628
override val model: LLModel,
2729
public val maxAgentIterations: Int,
2830
public val missingToolsConversionStrategy: MissingToolsConversionStrategy =
29-
MissingToolsConversionStrategy.Missing(ToolCallDescriber.JSON)
31+
MissingToolsConversionStrategy.Missing(ToolCallDescriber.JSON),
32+
public val responseProcessor: ResponseProcessor? = null,
3033
) : AIAgentConfigBase {
3134

3235
init {

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/context/AIAgentLLMContext.kt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import ai.koog.agents.core.utils.RWLock
1212
import ai.koog.prompt.dsl.Prompt
1313
import ai.koog.prompt.executor.model.PromptExecutor
1414
import ai.koog.prompt.llm.LLModel
15+
import ai.koog.prompt.processor.ResponseProcessor
1516
import kotlinx.datetime.Clock
1617

1718
/**
@@ -44,6 +45,7 @@ public annotation class DetachedPromptExecutorAPI
4445
* @property toolRegistry A registry that contains metadata about available tools.
4546
* @property prompt The current LLM prompt being used or updated in write sessions.
4647
* @property model The current LLM model being used or updated in write sessions.
48+
* @property responseProcessor The current response processor being used or updated in write sessions.
4749
* @property promptExecutor The [PromptExecutor] responsible for performing operations on the current prompt.
4850
* @property environment The environment that manages tool execution and interaction with external dependencies.
4951
* @property clock The clock used for timestamps of messages
@@ -53,6 +55,7 @@ public class AIAgentLLMContext(
5355
public val toolRegistry: ToolRegistry = ToolRegistry.EMPTY,
5456
prompt: Prompt,
5557
model: LLModel,
58+
responseProcessor: ResponseProcessor?,
5659
@property:DetachedPromptExecutorAPI
5760
public val promptExecutor: PromptExecutor,
5861
private val environment: AIAgentEnvironment,
@@ -73,6 +76,13 @@ public class AIAgentLLMContext(
7376
public var model: LLModel = model
7477
private set
7578

79+
/**
80+
* Response processor currently associated with this context.
81+
*/
82+
@DetachedPromptExecutorAPI
83+
public var responseProcessor: ResponseProcessor? = responseProcessor
84+
private set
85+
7686
/**
7787
* The current prompt used within the `AIAgentLLMContext`.
7888
*
@@ -106,6 +116,7 @@ public class AIAgentLLMContext(
106116
toolRegistry: ToolRegistry = this.toolRegistry,
107117
prompt: Prompt = this.prompt,
108118
model: LLModel = this.model,
119+
responseProcessor: ResponseProcessor? = this.responseProcessor,
109120
promptExecutor: PromptExecutor = this.promptExecutor,
110121
environment: AIAgentEnvironment = this.environment,
111122
config: AIAgentConfig = this.config,
@@ -116,6 +127,7 @@ public class AIAgentLLMContext(
116127
toolRegistry = toolRegistry,
117128
prompt = prompt,
118129
model = model,
130+
responseProcessor = responseProcessor,
119131
promptExecutor = promptExecutor,
120132
environment = environment,
121133
config = config,
@@ -132,7 +144,7 @@ public class AIAgentLLMContext(
132144
@OptIn(ExperimentalStdlibApi::class)
133145
public suspend fun <T> writeSession(block: suspend AIAgentLLMWriteSession.() -> T): T = rwLock.withWriteLock {
134146
val session =
135-
AIAgentLLMWriteSession(environment, promptExecutor, tools, toolRegistry, prompt, model, config, clock)
147+
AIAgentLLMWriteSession(environment, promptExecutor, tools, toolRegistry, prompt, model, responseProcessor, config, clock)
136148

137149
session.use {
138150
val result = it.block()
@@ -141,6 +153,7 @@ public class AIAgentLLMContext(
141153
this.prompt = it.prompt
142154
this.tools = it.tools
143155
this.model = it.model
156+
this.responseProcessor = it.responseProcessor
144157

145158
result
146159
}
@@ -152,7 +165,7 @@ public class AIAgentLLMContext(
152165
*/
153166
@OptIn(ExperimentalStdlibApi::class)
154167
public suspend fun <T> readSession(block: suspend AIAgentLLMReadSession.() -> T): T = rwLock.withReadLock {
155-
val session = AIAgentLLMReadSession(tools, promptExecutor, prompt, model, config)
168+
val session = AIAgentLLMReadSession(tools, promptExecutor, prompt, model, responseProcessor, config)
156169

157170
session.use { block(it) }
158171
}
@@ -166,11 +179,12 @@ public class AIAgentLLMContext(
166179
tools: List<ToolDescriptor> = this.tools,
167180
prompt: Prompt = this.prompt,
168181
model: LLModel = this.model,
182+
responseProcessor: ResponseProcessor? = this.responseProcessor,
169183
promptExecutor: PromptExecutor = this.promptExecutor,
170184
environment: AIAgentEnvironment = this.environment,
171185
config: AIAgentConfig = this.config,
172186
clock: Clock = this.clock
173187
): AIAgentLLMContext {
174-
return AIAgentLLMContext(tools, toolRegistry, prompt, model, promptExecutor, environment, config, clock)
188+
return AIAgentLLMContext(tools, toolRegistry, prompt, model, responseProcessor, promptExecutor, environment, config, clock)
175189
}
176190
}

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/entity/AIAgentSubgraph.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import ai.koog.agents.core.tools.ToolDescriptor
1616
import ai.koog.agents.core.tools.annotations.LLMDescription
1717
import ai.koog.prompt.llm.LLModel
1818
import ai.koog.prompt.params.LLMParams
19+
import ai.koog.prompt.processor.ResponseProcessor
1920
import ai.koog.prompt.structure.StructureFixingParser
2021
import ai.koog.prompt.structure.StructuredRequest
2122
import ai.koog.prompt.structure.StructuredRequestConfig
@@ -42,6 +43,7 @@ import kotlin.uuid.Uuid
4243
* @param toolSelectionStrategy Strategy determining which tools should be available during this subgraph's execution.
4344
* @param llmModel Optional [LLModel] override for the subgraph execution.
4445
* @param llmParams Optional [LLMParams] override for the prompt for the subgraph execution.
46+
* @param responseProcessor Optional [ResponseProcessor] override for the subgraph execution.
4547
*/
4648
public open class AIAgentSubgraph<TInput, TOutput>(
4749
override val name: String,
@@ -50,6 +52,7 @@ public open class AIAgentSubgraph<TInput, TOutput>(
5052
private val toolSelectionStrategy: ToolSelectionStrategy,
5153
private val llmModel: LLModel? = null,
5254
private val llmParams: LLMParams? = null,
55+
private val responseProcessor: ResponseProcessor? = null,
5356
) : AIAgentNodeBase<TInput, TOutput>(), ExecutionPointNode {
5457
override val inputType: KType = start.inputType
5558
override val outputType: KType = finish.outputType
@@ -166,7 +169,8 @@ public open class AIAgentSubgraph<TInput, TOutput>(
166169
llm = llm.copy(
167170
tools = newTools,
168171
model = llmModel ?: llm.model,
169-
prompt = llm.prompt.copy(params = llmParams ?: llm.prompt.params)
172+
prompt = llm.prompt.copy(params = llmParams ?: llm.prompt.params),
173+
responseProcessor = responseProcessor
170174
)
171175
)
172176
}

agents/agents-core/src/commonMain/kotlin/ai/koog/agents/core/agent/session/AIAgentLLMReadSession.kt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import ai.koog.agents.core.tools.ToolDescriptor
55
import ai.koog.prompt.dsl.Prompt
66
import ai.koog.prompt.executor.model.PromptExecutor
77
import ai.koog.prompt.llm.LLModel
8+
import ai.koog.prompt.processor.ResponseProcessor
89

910
/**
1011
* Represents a session for interacting with a language model (LLM) in a read-only context within an AI agent setup.
@@ -16,12 +17,14 @@ import ai.koog.prompt.llm.LLModel
1617
* @param executor The `PromptExecutor` responsible for handling execution of prompts within this session.
1718
* @param prompt The `Prompt` object specifying the input messages and parameters for the session.
1819
* @param model The language model instance to be used for processing prompts in this session.
20+
* @param responseProcessor The response processor instance to be used for post-processing responses.
1921
* @param config The configuration settings for the AI agent session.
2022
*/
2123
public class AIAgentLLMReadSession internal constructor(
2224
tools: List<ToolDescriptor>,
2325
executor: PromptExecutor,
2426
prompt: Prompt,
2527
model: LLModel,
28+
responseProcessor: ResponseProcessor?,
2629
config: AIAgentConfig,
27-
) : AIAgentLLMSession(executor, tools, prompt, model, config)
30+
) : AIAgentLLMSession(executor, tools, prompt, model, responseProcessor, config)

0 commit comments

Comments
 (0)