Skip to content

Commit 08bbf81

Browse files
committed
Add LLM as a Judge component
1 parent 177b5ee commit 08bbf81

File tree

1 file changed

+111
-0
lines changed
  • agents/agents-ext/src/commonMain/kotlin/ai/koog/agents/ext/agent

1 file changed

+111
-0
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
package ai.koog.agents.ext.agent
2+
3+
import ai.koog.agents.core.dsl.builder.AIAgentNodeDelegate
4+
import ai.koog.agents.core.dsl.builder.AIAgentSubgraphBuilderBase
5+
import ai.koog.agents.core.tools.annotations.LLMDescription
6+
import ai.koog.prompt.dsl.prompt
7+
import ai.koog.prompt.executor.clients.openai.OpenAIModels
8+
import ai.koog.prompt.llm.LLModel
9+
import ai.koog.prompt.message.Message
10+
import ai.koog.prompt.structure.StructureFixingParser
11+
import kotlinx.serialization.Serializable
12+
13+
@Serializable
14+
@LLMDescription("Result of the evaluation")
15+
internal data class CriticResultFromLLM(
16+
@property:LLMDescription("Was the plan correct?")
17+
val isCorrect: Boolean,
18+
@property:LLMDescription(
19+
"Optional feedback about the plan. " +
20+
"Only needed if `isCorrect == false` and if plan needs adjustments."
21+
)
22+
val feedback: String
23+
)
24+
25+
/**
26+
* Represents the result of a critique or feedback process.
27+
*
28+
* @property successful Indicates whether the critique operation was successful.
29+
* @property feedback A textual message providing details about the*/
30+
public data class CriticResult(
31+
val successful: Boolean,
32+
val feedback: String,
33+
val input: String
34+
)
35+
36+
/**
37+
* A method to utilize a language model (LLM) as a critic or judge for evaluating tasks with context-aware feedback.
38+
* This method processes a given task and the interaction history to provide structured feedback on the task's correctness.
39+
*
40+
* @param llmModel The optional language model to override the default model during the session. If `null`, the default model will be used.
41+
* @param task The task or instruction to be presented to the language model for critical evaluation.
42+
*/
43+
public fun AIAgentSubgraphBuilderBase<*, *>.llmAsAJudge(
44+
llmModel: LLModel? = null,
45+
task: String
46+
): AIAgentNodeDelegate<String, CriticResult> = node<String, CriticResult> { nodeInput ->
47+
llm.writeSession {
48+
val initialPrompt = prompt.copy()
49+
val initialModel = model
50+
51+
prompt = prompt("critic") {
52+
// Combine all history into one message with XML tags
53+
// to prevent LLM from continuing answering in a tool_call -> tool_result pattern
54+
val combinedMessage = buildString {
55+
append("<previous_conversation>\n")
56+
initialPrompt.messages.forEach { message ->
57+
when (message) {
58+
is Message.System -> append("<user>\n${message.content}\n</user>\n")
59+
is Message.User -> append("<user>\n${message.content}\n</user>\n")
60+
is Message.Assistant -> append("<assistant>\n${message.content}\n</assistant>\n")
61+
is Message.Tool.Call -> append(
62+
"<tool_call tool=${message.tool}>\n${message.content}\n</tool_call>\n"
63+
)
64+
65+
is Message.Tool.Result -> append(
66+
"<tool_result tool=${message.tool}>\n${message.content}\n</tool_result>\n"
67+
)
68+
}
69+
}
70+
append("</previous_conversation>\n")
71+
}
72+
73+
// Put Critic Task as a System instruction
74+
system(task)
75+
// And rest of the history -- in a combined XML message
76+
user(combinedMessage)
77+
}
78+
79+
if (llmModel != null) {
80+
model = llmModel
81+
}
82+
83+
val result = requestLLMStructured<CriticResultFromLLM>(
84+
// optional field -- recommented for LLM awareness and reliability of the output
85+
examples = listOf(
86+
CriticResultFromLLM(
87+
isCorrect = true,
88+
feedback = "All good"
89+
),
90+
CriticResultFromLLM(
91+
isCorrect = false,
92+
feedback = "Following parts of the plan have problems: *, *, *. Please consider changing ..."
93+
)
94+
),
95+
// optional field -- recommented for reliability of the format
96+
fixingParser = StructureFixingParser(
97+
fixingModel = OpenAIModels.CostOptimized.GPT4oMini,
98+
retries = 3,
99+
)
100+
).getOrThrow().structure
101+
102+
prompt = initialPrompt
103+
model = initialModel
104+
105+
CriticResult(
106+
successful = result.isCorrect,
107+
feedback = result.feedback,
108+
input = nodeInput
109+
)
110+
}
111+
}

0 commit comments

Comments
 (0)