|
| 1 | +package ai.koog.agents.ext.agent |
| 2 | + |
| 3 | +import ai.koog.agents.core.dsl.builder.AIAgentNodeDelegate |
| 4 | +import ai.koog.agents.core.dsl.builder.AIAgentSubgraphBuilderBase |
| 5 | +import ai.koog.agents.core.tools.annotations.LLMDescription |
| 6 | +import ai.koog.prompt.dsl.prompt |
| 7 | +import ai.koog.prompt.executor.clients.openai.OpenAIModels |
| 8 | +import ai.koog.prompt.llm.LLModel |
| 9 | +import ai.koog.prompt.message.Message |
| 10 | +import ai.koog.prompt.structure.StructureFixingParser |
| 11 | +import kotlinx.serialization.Serializable |
| 12 | + |
| 13 | +@Serializable |
| 14 | +@LLMDescription("Result of the evaluation") |
| 15 | +internal data class CriticResultFromLLM( |
| 16 | + @property:LLMDescription("Was the plan correct?") |
| 17 | + val isCorrect: Boolean, |
| 18 | + @property:LLMDescription( |
| 19 | + "Optional feedback about the plan. " + |
| 20 | + "Only needed if `isCorrect == false` and if plan needs adjustments." |
| 21 | + ) |
| 22 | + val feedback: String |
| 23 | +) |
| 24 | + |
| 25 | +/** |
| 26 | + * Represents the result of a critique or feedback process. |
| 27 | + * |
| 28 | + * @property successful Indicates whether the critique operation was successful. |
| 29 | + * @property feedback A textual message providing details about the*/ |
| 30 | +public data class CriticResult( |
| 31 | + val successful: Boolean, |
| 32 | + val feedback: String, |
| 33 | + val input: String |
| 34 | +) |
| 35 | + |
| 36 | +/** |
| 37 | + * A method to utilize a language model (LLM) as a critic or judge for evaluating tasks with context-aware feedback. |
| 38 | + * This method processes a given task and the interaction history to provide structured feedback on the task's correctness. |
| 39 | + * |
| 40 | + * @param llmModel The optional language model to override the default model during the session. If `null`, the default model will be used. |
| 41 | + * @param task The task or instruction to be presented to the language model for critical evaluation. |
| 42 | + */ |
| 43 | +public fun AIAgentSubgraphBuilderBase<*, *>.llmAsAJudge( |
| 44 | + llmModel: LLModel? = null, |
| 45 | + task: String |
| 46 | +): AIAgentNodeDelegate<String, CriticResult> = node<String, CriticResult> { nodeInput -> |
| 47 | + llm.writeSession { |
| 48 | + val initialPrompt = prompt.copy() |
| 49 | + val initialModel = model |
| 50 | + |
| 51 | + prompt = prompt("critic") { |
| 52 | + // Combine all history into one message with XML tags |
| 53 | + // to prevent LLM from continuing answering in a tool_call -> tool_result pattern |
| 54 | + val combinedMessage = buildString { |
| 55 | + append("<previous_conversation>\n") |
| 56 | + initialPrompt.messages.forEach { message -> |
| 57 | + when (message) { |
| 58 | + is Message.System -> append("<user>\n${message.content}\n</user>\n") |
| 59 | + is Message.User -> append("<user>\n${message.content}\n</user>\n") |
| 60 | + is Message.Assistant -> append("<assistant>\n${message.content}\n</assistant>\n") |
| 61 | + is Message.Tool.Call -> append( |
| 62 | + "<tool_call tool=${message.tool}>\n${message.content}\n</tool_call>\n" |
| 63 | + ) |
| 64 | + |
| 65 | + is Message.Tool.Result -> append( |
| 66 | + "<tool_result tool=${message.tool}>\n${message.content}\n</tool_result>\n" |
| 67 | + ) |
| 68 | + } |
| 69 | + } |
| 70 | + append("</previous_conversation>\n") |
| 71 | + } |
| 72 | + |
| 73 | + // Put Critic Task as a System instruction |
| 74 | + system(task) |
| 75 | + // And rest of the history -- in a combined XML message |
| 76 | + user(combinedMessage) |
| 77 | + } |
| 78 | + |
| 79 | + if (llmModel != null) { |
| 80 | + model = llmModel |
| 81 | + } |
| 82 | + |
| 83 | + val result = requestLLMStructured<CriticResultFromLLM>( |
| 84 | + // optional field -- recommented for LLM awareness and reliability of the output |
| 85 | + examples = listOf( |
| 86 | + CriticResultFromLLM( |
| 87 | + isCorrect = true, |
| 88 | + feedback = "All good" |
| 89 | + ), |
| 90 | + CriticResultFromLLM( |
| 91 | + isCorrect = false, |
| 92 | + feedback = "Following parts of the plan have problems: *, *, *. Please consider changing ..." |
| 93 | + ) |
| 94 | + ), |
| 95 | + // optional field -- recommented for reliability of the format |
| 96 | + fixingParser = StructureFixingParser( |
| 97 | + fixingModel = OpenAIModels.CostOptimized.GPT4oMini, |
| 98 | + retries = 3, |
| 99 | + ) |
| 100 | + ).getOrThrow().structure |
| 101 | + |
| 102 | + prompt = initialPrompt |
| 103 | + model = initialModel |
| 104 | + |
| 105 | + CriticResult( |
| 106 | + successful = result.isCorrect, |
| 107 | + feedback = result.feedback, |
| 108 | + input = nodeInput |
| 109 | + ) |
| 110 | + } |
| 111 | +} |
0 commit comments