From bfeabf21946183d08a2f1babd25f82900e07d24a Mon Sep 17 00:00:00 2001
From: Tyler Barnes <tylerdbarnes@gmail.com>
Date: Thu, 25 Jun 2026 21:20:40 -0700
Subject: [PATCH 1/2] feat(core): support inline json prompt injection

Widens the jsonPromptInjection type from boolean to boolean | 'system' | 'inline'. The 'inline' mode injects JSON schema instructions into the latest user message instead of the system prompt, preserving prompt cache for providers with prefix-based caching. Adds feature flag 'json-prompt-injection:inline' for runtime capability detection.

Co-Authored-By: Mastra Code (crof/glm-5.2) <noreply@mastra.ai>
---
 packages/core/src/agent/durable/types.ts      |   2 +-
 packages/core/src/agent/types.ts              |   7 +-
 packages/core/src/agent/utils.test.ts         |  15 ++
 packages/core/src/agent/utils.ts              |  18 ++-
 packages/core/src/features/index.ts           |   1 +
 .../processors/structured-output.ts           |   2 +-
 .../core/src/stream/aisdk/v5/execute.test.ts  | 140 ++++++++++++++++++
 packages/core/src/stream/aisdk/v5/execute.ts  |  61 ++++++--
 8 files changed, 229 insertions(+), 17 deletions(-)
diff --git a/packages/core/src/agent/durable/types.ts b/packages/core/src/agent/durable/types.ts
index 627d21f0687..80f421efb49 100644
--- a/packages/core/src/agent/durable/types.ts
+++ b/packages/core/src/agent/durable/types.ts
@@ -135,7 +135,7 @@ export interface SerializableStructuredOutput {
   /** JSON Schema representation of the output schema */
   schema?: JSONSchema7;
   /** Whether to use JSON prompt injection instead of native response format */
-  jsonPromptInjection?: boolean;
+  jsonPromptInjection?: boolean | 'system' | 'inline';
   /** Whether to use the parent agent's model for structuring */
   useAgent?: boolean;
   /** Model config for a dedicated structuring model (if different from the main model) */
diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts
index e8c48e89ff8..069053e760f 100644
--- a/packages/core/src/agent/types.ts
+++ b/packages/core/src/agent/types.ts
@@ -376,9 +376,12 @@ export type StructuredOutputOptionsBase<OUTPUT = {}> = {
   useAgent?: boolean;
 
   /**
-   * Whether to use system prompt injection instead of native response format to coerce the LLM to respond with json text if the LLM does not natively support structured outputs.
+   * Whether to use prompt injection instead of native response format to coerce the LLM to respond with JSON text.
+   * true and 'system' inject JSON instructions into the leading system message.
+   * 'inline' appends JSON instructions to the latest user message.
+   * false or omitted uses the provider's native response format.
    */
-  jsonPromptInjection?: boolean;
+  jsonPromptInjection?: boolean | 'system' | 'inline';
 
   /**
    * Optional logger instance for structured logging
diff --git a/packages/core/src/agent/utils.test.ts b/packages/core/src/agent/utils.test.ts
index 040b6c65c7c..4a5ca910f4b 100644
--- a/packages/core/src/agent/utils.test.ts
+++ b/packages/core/src/agent/utils.test.ts
@@ -56,6 +56,21 @@ describe('tryGenerateWithJsonFallback', () => {
     expect(generate.mock.calls[1][1].structuredOutput.jsonPromptInjection).toBe(true);
   });
 
+  it('preserves explicit inline jsonPromptInjection on the retry', async () => {
+    const generate = vi
+      .fn()
+      .mockResolvedValueOnce({ object: undefined })
+      .mockResolvedValueOnce({ object: { decision: 'done' } });
+
+    const options = {
+      structuredOutput: { schema: z.object({ decision: z.string() }), jsonPromptInjection: 'inline' },
+    } as any;
+
+    await tryGenerateWithJsonFallback(makeAgent(generate), 'prompt', options);
+
+    expect(generate.mock.calls[1][1].structuredOutput.jsonPromptInjection).toBe('inline');
+  });
+
   it('preserves the rest of the options on the retry', async () => {
     const generate = vi
       .fn()
diff --git a/packages/core/src/agent/utils.ts b/packages/core/src/agent/utils.ts
index 5b8ba9dd09a..824de72069c 100644
--- a/packages/core/src/agent/utils.ts
+++ b/packages/core/src/agent/utils.ts
@@ -57,7 +57,14 @@ export async function tryGenerateWithJsonFallback<OUTPUT>(
     console.warn('Error in tryGenerateWithJsonFallback. Attempting fallback.', error);
     return await agent.generate(prompt, {
       ...options,
-      structuredOutput: { ...options.structuredOutput, jsonPromptInjection: true },
+      structuredOutput: {
+        ...options.structuredOutput,
+        jsonPromptInjection:
+          options.structuredOutput.jsonPromptInjection === 'inline' ||
+          options.structuredOutput.jsonPromptInjection === 'system'
+            ? options.structuredOutput.jsonPromptInjection
+            : true,
+      },
     });
   }
 }
@@ -98,7 +105,14 @@ export async function tryStreamWithJsonFallback<OUTPUT extends {}>(
     console.warn('Error in tryStreamWithJsonFallback. Attempting fallback.', error);
     const result = await agent.stream(prompt, {
       ...streamOptions,
-      structuredOutput: { ...streamOptions.structuredOutput, jsonPromptInjection: true },
+      structuredOutput: {
+        ...streamOptions.structuredOutput,
+        jsonPromptInjection:
+          streamOptions.structuredOutput.jsonPromptInjection === 'inline' ||
+          streamOptions.structuredOutput.jsonPromptInjection === 'system'
+            ? streamOptions.structuredOutput.jsonPromptInjection
+            : true,
+      },
     });
     void onStream?.(result as unknown as Awaited<ReturnType<Agent['stream']>>);
     return result;
diff --git a/packages/core/src/features/index.ts b/packages/core/src/features/index.ts
index 5e39cc63ddd..85b3ab31a4e 100644
--- a/packages/core/src/features/index.ts
+++ b/packages/core/src/features/index.ts
@@ -26,4 +26,5 @@ export const coreFeatures = new Set<string>([
   'deploy-diagnosis',
   'model-inference-span',
   'internal-usage-rollup',
+  'json-prompt-injection:inline',
 ]);
diff --git a/packages/core/src/processors/processors/structured-output.ts b/packages/core/src/processors/processors/structured-output.ts
index 1e4638bb276..e0e6485e5bb 100644
--- a/packages/core/src/processors/processors/structured-output.ts
+++ b/packages/core/src/processors/processors/structured-output.ts
@@ -45,7 +45,7 @@ export class StructuredOutputProcessor<OUTPUT extends {}> implements Processor<'
   private errorStrategy: 'strict' | 'warn' | 'fallback';
   private fallbackValue?: OUTPUT;
   private isStructuringAgentStreamStarted = false;
-  private jsonPromptInjection?: boolean;
+  private jsonPromptInjection?: boolean | 'system' | 'inline';
   private providerOptions?: ProviderOptions;
   private logger?: IMastraLogger;
 
diff --git a/packages/core/src/stream/aisdk/v5/execute.test.ts b/packages/core/src/stream/aisdk/v5/execute.test.ts
index 0b526886e65..e95e7bed154 100644
--- a/packages/core/src/stream/aisdk/v5/execute.test.ts
+++ b/packages/core/src/stream/aisdk/v5/execute.test.ts
@@ -1,6 +1,7 @@
 import { convertArrayToReadableStream, MockLanguageModelV2 } from '@internal/ai-sdk-v5/test';
 import { describe, expect, it } from 'vitest';
 import { z } from 'zod/v4';
+import { coreFeatures } from '../../../features';
 import { execute } from './execute';
 import { testUsage } from './test-utils';
 
@@ -20,6 +21,145 @@ async function readStream(stream: ReadableStream) {
 }
 
 describe('execute structured output prompt handling', () => {
+  it('advertises inline JSON prompt injection support', () => {
+    expect(coreFeatures.has('json-prompt-injection:inline')).toBe(true);
+  });
+
+  it('injects direct structured output schema into the leading system message for boolean and system modes', async () => {
+    const capturedPrompts: unknown[] = [];
+    const model = new MockLanguageModelV2({
+      doStream: async ({ prompt }: any) => {
+        capturedPrompts.push(prompt);
+        return {
+          stream: convertArrayToReadableStream([
+            { type: 'stream-start', warnings: [] },
+            { type: 'response-metadata', id: 'id-system', modelId: 'mock-model-id', timestamp: new Date(0) },
+            { type: 'text-start', id: 'text-1' },
+            { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' },
+            { type: 'text-end', id: 'text-1' },
+            { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined },
+          ]),
+          request: { body: '' },
+          response: { headers: {} },
+          warnings: [] as any[],
+        };
+      },
+    });
+
+    for (const jsonPromptInjection of [true, 'system'] as const) {
+      const stream = execute({
+        runId: `test-run-id-${jsonPromptInjection}`,
+        model: model as any,
+        inputMessages,
+        onResult: () => {},
+        methodType: 'stream',
+        structuredOutput: {
+          schema,
+          jsonPromptInjection,
+        },
+      });
+      await readStream(stream);
+    }
+
+    expect(capturedPrompts).toHaveLength(2);
+    for (const capturedPrompt of capturedPrompts) {
+      expect((capturedPrompt as any[])[0].role).toBe('system');
+      expect(JSON.stringify((capturedPrompt as any[])[0])).toContain('suggestions');
+    }
+  });
+
+  it('injects direct structured output schema into the latest user message for inline mode', async () => {
+    let capturedPrompt: unknown;
+    let capturedResponseFormat: unknown;
+    const model = new MockLanguageModelV2({
+      doStream: async ({ prompt, responseFormat }: any) => {
+        capturedPrompt = prompt;
+        capturedResponseFormat = responseFormat;
+        return {
+          stream: convertArrayToReadableStream([
+            { type: 'stream-start', warnings: [] },
+            { type: 'response-metadata', id: 'id-inline', modelId: 'mock-model-id', timestamp: new Date(0) },
+            { type: 'text-start', id: 'text-1' },
+            { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' },
+            { type: 'text-end', id: 'text-1' },
+            { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined },
+          ]),
+          request: { body: '' },
+          response: { headers: {} },
+          warnings: [] as any[],
+        };
+      },
+    });
+
+    const messages = [
+      { role: 'system' as const, content: 'Keep this prefix stable.' },
+      { role: 'user' as const, content: [{ type: 'text' as const, text: 'First request.' }] },
+      { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'First response.' }] },
+      { role: 'user' as const, content: [{ type: 'text' as const, text: 'Extract now.' }] },
+    ];
+
+    const stream = execute({
+      runId: 'test-run-id-inline',
+      model: model as any,
+      inputMessages: messages,
+      onResult: () => {},
+      methodType: 'stream',
+      structuredOutput: {
+        schema,
+        jsonPromptInjection: 'inline',
+      },
+    });
+
+    await readStream(stream);
+
+    expect(capturedResponseFormat).toBeUndefined();
+    expect((capturedPrompt as any[])[0]).toEqual(messages[0]);
+    expect(JSON.stringify((capturedPrompt as any[])[1])).not.toContain(
+      'Return your response as JSON matching this schema',
+    );
+    expect(JSON.stringify((capturedPrompt as any[])[3])).toContain('Return your response as JSON matching this schema');
+    expect(JSON.stringify((capturedPrompt as any[])[3])).toContain('suggestions');
+  });
+
+  it('adds a user message for inline mode when no user message exists', async () => {
+    let capturedPrompt: unknown;
+    const model = new MockLanguageModelV2({
+      doStream: async ({ prompt }: any) => {
+        capturedPrompt = prompt;
+        return {
+          stream: convertArrayToReadableStream([
+            { type: 'stream-start', warnings: [] },
+            { type: 'response-metadata', id: 'id-inline-no-user', modelId: 'mock-model-id', timestamp: new Date(0) },
+            { type: 'text-start', id: 'text-1' },
+            { type: 'text-delta', id: 'text-1', delta: '{"suggestions":["ship"]}' },
+            { type: 'text-end', id: 'text-1' },
+            { type: 'finish', finishReason: 'stop', usage: testUsage, providerMetadata: undefined },
+          ]),
+          request: { body: '' },
+          response: { headers: {} },
+          warnings: [] as any[],
+        };
+      },
+    });
+
+    const stream = execute({
+      runId: 'test-run-id-inline-no-user',
+      model: model as any,
+      inputMessages: [{ role: 'system' as const, content: 'System only.' }],
+      onResult: () => {},
+      methodType: 'stream',
+      structuredOutput: {
+        schema,
+        jsonPromptInjection: 'inline',
+      },
+    });
+
+    await readStream(stream);
+
+    expect((capturedPrompt as any[])[0]).toEqual({ role: 'system', content: 'System only.' });
+    expect((capturedPrompt as any[])[1].role).toBe('user');
+    expect(JSON.stringify((capturedPrompt as any[])[1])).toContain('Return your response as JSON matching this schema');
+  });
   it('does not inject processor schema instructions into the main prompt when useAgent is enabled', async () => {
     let capturedPrompt: unknown;
     const model = new MockLanguageModelV2({
diff --git a/packages/core/src/stream/aisdk/v5/execute.ts b/packages/core/src/stream/aisdk/v5/execute.ts
index 5369b0cce90..fd61722f7e2 100644
--- a/packages/core/src/stream/aisdk/v5/execute.ts
+++ b/packages/core/src/stream/aisdk/v5/execute.ts
@@ -13,6 +13,41 @@ import { prepareToolsAndToolChoice } from './compat';
 import type { ModelSpecVersion } from './compat';
 import { AISDKV5InputStream } from './input';
 
+function buildJsonInstruction(schema: unknown) {
+  return `Return your response as JSON matching this schema:\n\n${JSON.stringify(schema)}\n\nReturn only valid JSON. Do not include markdown or explanatory text.`;
+}
+
+function injectJsonInstructionIntoLatestUserMessage({
+  messages,
+  schema,
+}: {
+  messages: LanguageModelV2Prompt;
+  schema: unknown;
+}): LanguageModelV2Prompt {
+  const instruction = buildJsonInstruction(schema);
+  const prompt = messages.map(message => ({
+    ...message,
+    content: Array.isArray(message.content) ? [...message.content] : message.content,
+  })) as LanguageModelV2Prompt;
+
+  for (let i = prompt.length - 1; i >= 0; i--) {
+    const message = prompt[i];
+    if (message?.role !== 'user') {
+      continue;
+    }
+
+    message.content = Array.isArray(message.content)
+      ? [...message.content, { type: 'text', text: instruction }]
+      : [
+          { type: 'text', text: String(message.content ?? '') },
+          { type: 'text', text: instruction },
+        ];
+    return prompt;
+  }
+
+  return [...prompt, { role: 'user', content: [{ type: 'text', text: instruction }] }] as LanguageModelV2Prompt;
+}
+
 function omit<T extends object, K extends keyof T>(obj: T, keys: K[]): Omit<T, K> {
   const newObj = { ...obj };
   for (const key of keys) {
@@ -99,13 +134,21 @@ export function execute<OUTPUT = undefined>({
     : undefined;
 
   let prompt = inputMessages;
+  const jsonPromptInjection = structuredOutput?.jsonPromptInjection;
+  const injectionMode = jsonPromptInjection === true ? 'system' : jsonPromptInjection;
 
   // For direct mode (no model provided for structuring agent), inject JSON schema instruction if opting out of native response format with jsonPromptInjection
-  if (structuredOutputMode === 'direct' && responseFormat?.type === 'json' && structuredOutput?.jsonPromptInjection) {
-    prompt = injectJsonInstructionIntoMessages({
-      messages: inputMessages,
-      schema: responseFormat.schema,
-    });
+  if (structuredOutputMode === 'direct' && responseFormat?.type === 'json' && injectionMode) {
+    prompt =
+      injectionMode === 'inline'
+        ? injectJsonInstructionIntoLatestUserMessage({
+            messages: inputMessages,
+            schema: responseFormat.schema,
+          })
+        : injectJsonInstructionIntoMessages({
+            messages: inputMessages,
+            schema: responseFormat.schema,
+          });
   }
 
   // For processor mode without agent reuse, inject a custom prompt to inform the main agent
@@ -130,8 +173,7 @@ export function execute<OUTPUT = undefined>({
    * @see https://platform.openai.com/docs/guides/structured-outputs#structured-outputs-vs-json-mode
    * @see https://ai-sdk.dev/docs/ai-sdk-core/generating-structured-data#accessing-reasoning
    */
-  const isOpenAIStrictMode =
-    model.provider.startsWith('openai') && responseFormat?.type === 'json' && !structuredOutput?.jsonPromptInjection;
+  const isOpenAIStrictMode = model.provider.startsWith('openai') && responseFormat?.type === 'json' && !injectionMode;
 
   // For OpenAI strict mode, ensure all properties are required and additionalProperties: false
   if (isOpenAIStrictMode && responseFormat?.schema) {
@@ -169,10 +211,7 @@ export function execute<OUTPUT = undefined>({
               providerOptions: providerOptionsToUse,
               abortSignal,
               includeRawChunks,
-              responseFormat:
-                structuredOutputMode === 'direct' && !structuredOutput?.jsonPromptInjection
-                  ? responseFormat
-                  : undefined,
+              responseFormat: structuredOutputMode === 'direct' && !injectionMode ? responseFormat : undefined,
               ...filteredModelSettings,
               headers,
             });

From 0ede67b8b8db4e9134a08fe645a782457453af37 Mon Sep 17 00:00:00 2001
From: Tyler Barnes <tylerdbarnes@gmail.com>
Date: Thu, 25 Jun 2026 22:23:11 -0700
Subject: [PATCH 2/2] chore: add changeset for inline json prompt injection

Co-Authored-By: Mastra Code (crof/glm-5.2) <noreply@mastra.ai>
---
 .changeset/quick-dingos-eat.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .changeset/quick-dingos-eat.md

diff --git a/.changeset/quick-dingos-eat.md b/.changeset/quick-dingos-eat.md
new file mode 100644
index 00000000000..a740e519bca
--- /dev/null
+++ b/.changeset/quick-dingos-eat.md
@@ -0,0 +1,13 @@
+---
+'@mastra/core': minor
+---
+
+support inline JSON prompt injection
+
+Widens the `jsonPromptInjection` type from `boolean` to
+`boolean | 'system' | 'inline'`. `'inline'` injects the
+JSON schema instruction into the latest user message
+instead of the leading system message, preserving prompt
+cache on providers with prefix-based caching. Also adds
+a `'json-prompt-injection:inline'` feature flag for
+runtime capability detection.