Skip to content

Commit d430aee

Browse files
authored
feat(prompt): Include examples in the prompt with StructuredRequest.Native to help LLMs better understand desired data format (#1396)
In certain cases, it's not enough to just constrain an LLM to a given data format, since it might "get lost". It helps to provide an LLM with examples regarding what type of data we want in the output, not only in which format. Fixes #1328
1 parent 84990ca commit d430aee

File tree

4 files changed

+57
-26
lines changed

4 files changed

+57
-26
lines changed

prompt/prompt-structure/src/commonMain/kotlin/ai/koog/prompt/structure/PromptExecutorExtensions.kt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,14 @@ public data class StructuredRequestConfig<T>(
7777

7878
// Rely on built-in model capabilities to provide structured response.
7979
is StructuredRequest.Native -> {
80-
prompt.withUpdatedParams { schema = mode.structure.schema }
80+
prompt(prompt) {
81+
// If examples are supplied, append them
82+
if (mode.structure.examples.isNotEmpty()) {
83+
user {
84+
mode.structure.examples(this)
85+
}
86+
}
87+
}.withUpdatedParams { schema = mode.structure.schema }
8188
}
8289
}
8390
}

prompt/prompt-structure/src/commonMain/kotlin/ai/koog/prompt/structure/Structure.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ai.koog.prompt.structure
22

33
import ai.koog.prompt.params.LLMParams
4+
import ai.koog.prompt.text.TextContentBuilderBase
45

56
/**
67
* Represents a generic structure for handling data with a specific schema.
@@ -34,4 +35,12 @@ public abstract class Structure<TStruct, TSchema : LLMParams.Schema>(
3435
* @return A string representing the pretty-printed version of the input structured data.
3536
*/
3637
public abstract fun pretty(value: TStruct): String
38+
39+
/**
40+
* Defines formatted examples using the provided [TextContentBuilderBase].
41+
*
42+
* @param builder The [TextContentBuilderBase] instance for constructing textual content.
43+
* @return The modified [TextContentBuilderBase] containing formatted examples.
44+
*/
45+
public abstract fun examples(builder: TextContentBuilderBase<*>): TextContentBuilderBase<*>
3746
}

prompt/prompt-structure/src/commonMain/kotlin/ai/koog/prompt/structure/StructuredPrompts.kt

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ import ai.koog.prompt.text.TextContentBuilderBase
88
*/
99
public object StructuredOutputPrompts {
1010
/**
11-
* Formats and appends the structured data output to the provided MarkdownContentBuilder.
12-
*
13-
* @param structure The StructuredData instance containing the format ID and definition for the output.
11+
* Formats and appends the structured data output to the provided text builder.
1412
*/
1513
public fun outputInstructionPrompt(builder: TextContentBuilderBase<*>, structure: Structure<*, *>): TextContentBuilderBase<*> = builder.apply {
1614
markdown {
@@ -21,4 +19,30 @@ public object StructuredOutputPrompts {
2119
structure.definition(this)
2220
}
2321
}
22+
23+
/**
24+
* Formats and appends structure examples, if they are present in the provided [structure], to the provided text builder,
25+
* to show an LLM expected output format. If [Structure.examples] is empty, nothing is appended.
26+
*/
27+
public fun <T> examplesPrompt(builder: TextContentBuilderBase<*>, structure: Structure<T, *>): TextContentBuilderBase<*> = builder.apply {
28+
markdown {
29+
if (structure.examples.isNotEmpty()) {
30+
h4("EXAMPLES")
31+
32+
if (structure.examples.size == 1) {
33+
+"Here is an example of a valid response:"
34+
} else {
35+
+"Here are some examples of valid responses:"
36+
}
37+
38+
structure.examples.forEach { example ->
39+
codeblock(
40+
code = ai.koog.prompt.text.text {
41+
structure(structure, example)
42+
},
43+
)
44+
}
45+
}
46+
}
47+
}
2448
}

prompt/prompt-structure/src/commonMain/kotlin/ai/koog/prompt/structure/json/JsonStructure.kt

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package ai.koog.prompt.structure.json
33
import ai.koog.prompt.markdown.markdown
44
import ai.koog.prompt.params.LLMParams
55
import ai.koog.prompt.structure.Structure
6+
import ai.koog.prompt.structure.StructuredOutputPrompts
67
import ai.koog.prompt.structure.json.generator.JsonSchemaGenerator
78
import ai.koog.prompt.structure.json.generator.StandardJsonSchemaGenerator
8-
import ai.koog.prompt.structure.structure
99
import ai.koog.prompt.text.TextContentBuilderBase
1010
import kotlinx.serialization.KSerializer
1111
import kotlinx.serialization.json.ClassDiscriminatorMode
@@ -21,8 +21,10 @@ import kotlinx.serialization.serializer
2121
* @property examples A list of example data items that conform to the structure.
2222
* @property serializer The serializer used to convert the data to and from JSON.
2323
* @property json [kotlinx.serialization.json.Json] instance to perform de/serialization.
24-
* @property definitionPrompt Prompt with definition, explaining the structure to the LLM.
24+
* @param definitionPrompt Prompt with definition, explaining the structure to the LLM.
2525
* Default is [JsonStructure.defaultDefinitionPrompt]
26+
* @param examplesPrompt Prompt with examples of valid formats for the structured data.
27+
* Default is [StructuredOutputPrompts.examplesPrompt]
2628
*/
2729
public class JsonStructure<TStruct>(
2830
id: String,
@@ -32,8 +34,12 @@ public class JsonStructure<TStruct>(
3234
public val json: Json,
3335
private val definitionPrompt: (
3436
builder: TextContentBuilderBase<*>,
35-
structuredData: JsonStructure<TStruct>
36-
) -> TextContentBuilderBase<*> = ::defaultDefinitionPrompt
37+
structure: JsonStructure<TStruct>
38+
) -> TextContentBuilderBase<*> = ::defaultDefinitionPrompt,
39+
private val examplesPrompt: (
40+
builder: TextContentBuilderBase<*>,
41+
structure: JsonStructure<TStruct>,
42+
) -> TextContentBuilderBase<*> = StructuredOutputPrompts::examplesPrompt,
3743
) : Structure<TStruct, LLMParams.Schema.JSON>(id, schema, examples) {
3844

3945
override fun parse(text: String): TStruct = json.decodeFromString(serializer, text.trim().stripMarkdown())
@@ -42,6 +48,8 @@ public class JsonStructure<TStruct>(
4248

4349
override fun definition(builder: TextContentBuilderBase<*>): TextContentBuilderBase<*> = definitionPrompt(builder, this)
4450

51+
override fun examples(builder: TextContentBuilderBase<*>): TextContentBuilderBase<*> = examplesPrompt(builder, this)
52+
4553
// LLMs often enclose JSON output in Markdown code blocks.
4654
// Stripping them saves extra LLM call which would be required to fix the string.
4755
private fun String.stripMarkdown() = when {
@@ -88,24 +96,7 @@ public class JsonStructure<TStruct>(
8896
+json.encodeToString(schema.schema)
8997
br()
9098

91-
if (examples.isNotEmpty()) {
92-
h4("EXAMPLES")
93-
94-
if (examples.size == 1) {
95-
+"Here is an example of a valid response:"
96-
} else {
97-
+"Here are some examples of valid responses:"
98-
}
99-
100-
examples.forEach { example ->
101-
codeblock(
102-
code = ai.koog.prompt.text.text {
103-
structure(this@with, example)
104-
},
105-
language = "json"
106-
)
107-
}
108-
}
99+
StructuredOutputPrompts.examplesPrompt(this, structuredData)
109100

110101
h2("RESULT")
111102
+"Provide ONLY the resulting JSON, WITHOUT ANY free text comments, backticks, or other symbols."

0 commit comments

Comments
 (0)