From bfeabf21946183d08a2f1babd25f82900e07d24a Mon Sep 17 00:00:00 2001 From: Tyler Barnes Date: Thu, 25 Jun 2026 21:20:40 -0700 Subject: [PATCH 1/2] feat(core): support inline json prompt injection Widens the jsonPromptInjection type from boolean to boolean | 'system' | 'inline'. The 'inline' mode injects JSON schema instructions into the latest user message instead of the system prompt, preserving prompt cache for providers with prefix-based caching. Adds feature flag 'json-prompt-injection:inline' for runtime capability detection. Co-Authored-By: Mastra Code (crof/glm-5.2) --- packages/core/src/agent/durable/types.ts | 2 +- packages/core/src/agent/types.ts | 7 +- packages/core/src/agent/utils.test.ts | 15 ++ packages/core/src/agent/utils.ts | 18 ++- packages/core/src/features/index.ts | 1 + .../processors/structured-output.ts | 2 +- .../core/src/stream/aisdk/v5/execute.test.ts | 140 ++++++++++++++++++ packages/core/src/stream/aisdk/v5/execute.ts | 61 ++++++-- 8 files changed, 229 insertions(+), 17 deletions(-) diff --git a/packages/core/src/agent/durable/types.ts b/packages/core/src/agent/durable/types.ts index 627d21f0687..80f421efb49 100644 --- a/packages/core/src/agent/durable/types.ts +++ b/packages/core/src/agent/durable/types.ts @@ -135,7 +135,7 @@ export interface SerializableStructuredOutput { /** JSON Schema representation of the output schema */ schema?: JSONSchema7; /** Whether to use JSON prompt injection instead of native response format */ - jsonPromptInjection?: boolean; + jsonPromptInjection?: boolean | 'system' | 'inline'; /** Whether to use the parent agent's model for structuring */ useAgent?: boolean; /** Model config for a dedicated structuring model (if different from the main model) */ diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index e8c48e89ff8..069053e760f 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -376,9 +376,12 @@ export type StructuredOutputOptionsBase = { useAgent?: boolean; /** - * Whether to use system prompt injection instead of native response format to coerce the LLM to respond with json text if the LLM does not natively support structured outputs. + * Whether to use prompt injection instead of native response format to coerce the LLM to respond with JSON text. + * true and 'system' inject JSON instructions into the leading system message. + * 'inline' appends JSON instructions to the latest user message. + * false or omitted uses the provider's native response format. */ - jsonPromptInjection?: boolean; + jsonPromptInjection?: boolean | 'system' | 'inline'; /** * Optional logger instance for structured logging diff --git a/packages/core/src/agent/utils.test.ts b/packages/core/src/agent/utils.test.ts index 040b6c65c7c..4a5ca910f4b 100644 --- a/packages/core/src/agent/utils.test.ts +++ b/packages/core/src/agent/utils.test.ts @@ -56,6 +56,21 @@ describe('tryGenerateWithJsonFallback', () => { expect(generate.mock.calls[1][1].structuredOutput.jsonPromptInjection).toBe(true); }); + it('preserves explicit inline jsonPromptInjection on the retry', async () => { + const generate = vi + .fn() + .mockResolvedValueOnce({ object: undefined }) + .mockResolvedValueOnce({ object: { decision: 'done' } }); + + const options = { + structuredOutput: { schema: z.object({ decision: z.string() }), jsonPromptInjection: 'inline' }, + } as any; + + await tryGenerateWithJsonFallback(makeAgent(generate), 'prompt', options); + + expect(generate.mock.calls[1][1].structuredOutput.jsonPromptInjection).toBe('inline'); + }); + it('preserves the rest of the options on the retry', async () => { const generate = vi .fn() diff --git a/packages/core/src/agent/utils.ts b/packages/core/src/agent/utils.ts index 5b8ba9dd09a..824de72069c 100644 --- a/packages/core/src/agent/utils.ts +++ b/packages/core/src/agent/utils.ts @@ -57,7 +57,14 @@ export async function tryGenerateWithJsonFallback( console.warn('Error in tryGenerateWithJsonFallback. Attempting fallback.', error); return await agent.generate(prompt, { ...options, - structuredOutput: { ...options.structuredOutput, jsonPromptInjection: true }, + structuredOutput: { + ...options.structuredOutput, + jsonPromptInjection: + options.structuredOutput.jsonPromptInjection === 'inline' || + options.structuredOutput.jsonPromptInjection === 'system' + ? options.structuredOutput.jsonPromptInjection + : true, + }, }); } } @@ -98,7 +105,14 @@ export async function tryStreamWithJsonFallback( console.warn('Error in tryStreamWithJsonFallback. Attempting fallback.', error); const result = await agent.stream(prompt, { ...streamOptions, - structuredOutput: { ...streamOptions.structuredOutput, jsonPromptInjection: true }, + structuredOutput: { + ...streamOptions.structuredOutput, + jsonPromptInjection: + streamOptions.structuredOutput.jsonPromptInjection === 'inline' || + streamOptions.structuredOutput.jsonPromptInjection === 'system' + ? streamOptions.structuredOutput.jsonPromptInjection + : true, + }, }); void onStream?.(result as unknown as Awaited>); return result; diff --git a/packages/core/src/features/index.ts b/packages/core/src/features/index.ts index 5e39cc63ddd..85b3ab31a4e 100644 --- a/packages/core/src/features/index.ts +++ b/packages/core/src/features/index.ts @@ -26,4 +26,5 @@ export const coreFeatures = new Set([ 'deploy-diagnosis', 'model-inference-span', 'internal-usage-rollup', + 'json-prompt-injection:inline', ]); diff --git a/packages/core/src/processors/processors/structured-output.ts b/packages/core/src/processors/processors/structured-output.ts index 1e4638bb276..e0e6485e5bb 100644 --- a/packages/core/src/processors/processors/structured-output.ts +++ b/packages/core/src/processors/processors/structured-output.ts @@ -45,7 +45,7 @@ export class StructuredOutputProcessor implements Processor<' private errorStrategy: 'strict' | 'warn' | 'fallback'; private fallbackValue?: OUTPUT; private isStructuringAgentStreamStarted = false; - private jsonPromptInjection?: boolean; + private jsonPromptInjection?: boolean | 'system' | 'inline'; private providerOptions?: ProviderOptions; private logger?: IMastraLogger; diff --git a/packages/core/src/stream/aisdk/v5/execute.test.ts b/packages/core/src/stream/aisdk/v5/execute.test.ts index 0b526886e65..e95e7bed154 100644 --- a/packages/core/src/stream/aisdk/v5/execute.test.ts +++ b/packages/core/src/stream/aisdk/v5/execute.test.ts @@ -1,6 +1,7 @@ import { convertArrayToReadableStream, MockLanguageModelV2 } from '@internal/ai-sdk-v5/test'; import { describe, expect, it } from 'vitest'; import { z } from 'zod/v4'; +import { coreFeatures } from '../../../features'; import { execute } from './execute'; import { testUsage } from './test-utils'; @@ -20,6 +21,145 @@ async function readStream(stream: ReadableStream) { } describe('execute structured output prompt handling', () => { + it('advertises inline JSON prompt injection support', () => { + expect(coreFeatures.has('json-prompt-injection:inline')).toBe(true); + }); + + it('injects direct structured output schema into the leading system message for boolean and system modes', async () => { + const capturedPrompts: unknown[] = []; + const model = new MockLanguageModelV2({ + doStream: async ({ prompt }: any) => { + capturedPrompts.push(prompt); + return { + stream: convertArrayToReadableStream([ + { type: 'stream-start', warnings: [] }, + { type: 'response-metadata', id: 'id-system', modelId: 'mock-model-id', timestamp: new Date(0) }, + { type: 'text-start', id: 'text-1' }, + { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' }, + { type: 'text-end', id: 'text-1' }, + { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined }, + ]), + request: { body: '' }, + response: { headers: {} }, + warnings: [] as any[], + }; + }, + }); + + for (const jsonPromptInjection of [true, 'system'] as const) { + const stream = execute({ + runId: `test-run-id-${jsonPromptInjection}`, + model: model as any, + inputMessages, + onResult: () => {}, + methodType: 'stream', + structuredOutput: { + schema, + jsonPromptInjection, + }, + }); + await readStream(stream); + } + + expect(capturedPrompts).toHaveLength(2); + for (const capturedPrompt of capturedPrompts) { + expect((capturedPrompt as any[])[0].role).toBe('system'); + expect(JSON.stringify((capturedPrompt as any[])[0])).toContain('suggestions'); + } + }); + + it('injects direct structured output schema into the latest user message for inline mode', async () => { + let capturedPrompt: unknown; + let capturedResponseFormat: unknown; + const model = new MockLanguageModelV2({ + doStream: async ({ prompt, responseFormat }: any) => { + capturedPrompt = prompt; + capturedResponseFormat = responseFormat; + return { + stream: convertArrayToReadableStream([ + { type: 'stream-start', warnings: [] }, + { type: 'response-metadata', id: 'id-inline', modelId: 'mock-model-id', timestamp: new Date(0) }, + { type: 'text-start', id: 'text-1' }, + { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' }, + { type: 'text-end', id: 'text-1' }, + { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined }, + ]), + request: { body: '' }, + response: { headers: {} }, + warnings: [] as any[], + }; + }, + }); + + const messages = [ + { role: 'system' as const, content: 'Keep this prefix stable.' }, + { role: 'user' as const, content: [{ type: 'text' as const, text: 'First request.' }] }, + { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'First response.' }] }, + { role: 'user' as const, content: [{ type: 'text' as const, text: 'Extract now.' }] }, + ]; + + const stream = execute({ + runId: 'test-run-id-inline', + model: model as any, + inputMessages: messages, + onResult: () => {}, + methodType: 'stream', + structuredOutput: { + schema, + jsonPromptInjection: 'inline', + }, + }); + + await readStream(stream); + + expect(capturedResponseFormat).toBeUndefined(); + expect((capturedPrompt as any[])[0]).toEqual(messages[0]); + expect(JSON.stringify((capturedPrompt as any[])[1])).not.toContain( + 'Return your response as JSON matching this schema', + ); + expect(JSON.stringify((capturedPrompt as any[])[3])).toContain('Return your response as JSON matching this schema'); + expect(JSON.stringify((capturedPrompt as any[])[3])).toContain('suggestions'); + }); + + it('adds a user message for inline mode when no user message exists', async () => { + let capturedPrompt: unknown; + const model = new MockLanguageModelV2({ + doStream: async ({ prompt }: any) => { + capturedPrompt = prompt; + return { + stream: convertArrayToReadableStream([ + { type: 'stream-start', warnings: [] }, + { type: 'response-metadata', id: 'id-inline-no-user', modelId: 'mock-model-id', timestamp: new Date(0) }, + { type: 'text-start', id: 'text-1' }, + { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' }, + { type: 'text-end', id: 'text-1' }, + { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined }, + ]), + request: { body: '' }, + response: { headers: {} }, + warnings: [] as any[], + }; + }, + }); + + const stream = execute({ + runId: 'test-run-id-inline-no-user', + model: model as any, + inputMessages: [{ role: 'system' as const, content: 'System only.' }], + onResult: () => {}, + methodType: 'stream', + structuredOutput: { + schema, + jsonPromptInjection: 'inline', + }, + }); + + await readStream(stream); + + expect((capturedPrompt as any[])[0]).toEqual({ role: 'system', content: 'System only.' }); + expect((capturedPrompt as any[])[1].role).toBe('user'); + expect(JSON.stringify((capturedPrompt as any[])[1])).toContain('Return your response as JSON matching this schema'); + }); it('does not inject processor schema instructions into the main prompt when useAgent is enabled', async () => { let capturedPrompt: unknown; const model = new MockLanguageModelV2({ diff --git a/packages/core/src/stream/aisdk/v5/execute.ts b/packages/core/src/stream/aisdk/v5/execute.ts index 5369b0cce90..fd61722f7e2 100644 --- a/packages/core/src/stream/aisdk/v5/execute.ts +++ b/packages/core/src/stream/aisdk/v5/execute.ts @@ -13,6 +13,41 @@ import { prepareToolsAndToolChoice } from './compat'; import type { ModelSpecVersion } from './compat'; import { AISDKV5InputStream } from './input'; +function buildJsonInstruction(schema: unknown) { + return `Return your response as JSON matching this schema:\n\n${JSON.stringify(schema)}\n\nReturn only valid JSON. Do not include markdown or explanatory text.`; +} + +function injectJsonInstructionIntoLatestUserMessage({ + messages, + schema, +}: { + messages: LanguageModelV2Prompt; + schema: unknown; +}): LanguageModelV2Prompt { + const instruction = buildJsonInstruction(schema); + const prompt = messages.map(message => ({ + ...message, + content: Array.isArray(message.content) ? [...message.content] : message.content, + })) as LanguageModelV2Prompt; + + for (let i = prompt.length - 1; i >= 0; i--) { + const message = prompt[i]; + if (message?.role !== 'user') { + continue; + } + + message.content = Array.isArray(message.content) + ? [...message.content, { type: 'text', text: instruction }] + : [ + { type: 'text', text: String(message.content ?? '') }, + { type: 'text', text: instruction }, + ]; + return prompt; + } + + return [...prompt, { role: 'user', content: [{ type: 'text', text: instruction }] }] as LanguageModelV2Prompt; +} + function omit(obj: T, keys: K[]): Omit { const newObj = { ...obj }; for (const key of keys) { @@ -99,13 +134,21 @@ export function execute({ : undefined; let prompt = inputMessages; + const jsonPromptInjection = structuredOutput?.jsonPromptInjection; + const injectionMode = jsonPromptInjection === true ? 'system' : jsonPromptInjection; // For direct mode (no model provided for structuring agent), inject JSON schema instruction if opting out of native response format with jsonPromptInjection - if (structuredOutputMode === 'direct' && responseFormat?.type === 'json' && structuredOutput?.jsonPromptInjection) { - prompt = injectJsonInstructionIntoMessages({ - messages: inputMessages, - schema: responseFormat.schema, - }); + if (structuredOutputMode === 'direct' && responseFormat?.type === 'json' && injectionMode) { + prompt = + injectionMode === 'inline' + ? injectJsonInstructionIntoLatestUserMessage({ + messages: inputMessages, + schema: responseFormat.schema, + }) + : injectJsonInstructionIntoMessages({ + messages: inputMessages, + schema: responseFormat.schema, + }); } // For processor mode without agent reuse, inject a custom prompt to inform the main agent @@ -130,8 +173,7 @@ export function execute({ * @see https://platform.openai.com/docs/guides/structured-outputs#structured-outputs-vs-json-mode * @see https://ai-sdk.dev/docs/ai-sdk-core/generating-structured-data#accessing-reasoning */ - const isOpenAIStrictMode = - model.provider.startsWith('openai') && responseFormat?.type === 'json' && !structuredOutput?.jsonPromptInjection; + const isOpenAIStrictMode = model.provider.startsWith('openai') && responseFormat?.type === 'json' && !injectionMode; // For OpenAI strict mode, ensure all properties are required and additionalProperties: false if (isOpenAIStrictMode && responseFormat?.schema) { @@ -169,10 +211,7 @@ export function execute({ providerOptions: providerOptionsToUse, abortSignal, includeRawChunks, - responseFormat: - structuredOutputMode === 'direct' && !structuredOutput?.jsonPromptInjection - ? responseFormat - : undefined, + responseFormat: structuredOutputMode === 'direct' && !injectionMode ? responseFormat : undefined, ...filteredModelSettings, headers, }); From 0ede67b8b8db4e9134a08fe645a782457453af37 Mon Sep 17 00:00:00 2001 From: Tyler Barnes Date: Thu, 25 Jun 2026 22:23:11 -0700 Subject: [PATCH 2/2] chore: add changeset for inline json prompt injection Co-Authored-By: Mastra Code (crof/glm-5.2) --- .changeset/quick-dingos-eat.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .changeset/quick-dingos-eat.md diff --git a/.changeset/quick-dingos-eat.md b/.changeset/quick-dingos-eat.md new file mode 100644 index 00000000000..a740e519bca --- /dev/null +++ b/.changeset/quick-dingos-eat.md @@ -0,0 +1,13 @@ +--- +'@mastra/core': minor +--- + +support inline JSON prompt injection + +Widens the `jsonPromptInjection` type from `boolean` to +`boolean | 'system' | 'inline'`. `'inline'` injects the +JSON schema instruction into the latest user message +instead of the leading system message, preserving prompt +cache on providers with prefix-based caching. Also adds +a `'json-prompt-injection:inline'` feature flag for +runtime capability detection.