Skip to content

Commit 89ca01a

Browse files
committed
schemaTokenEstimate: size string budgets from .max(), raise ceiling to 32k
1 parent 2d4470d commit 89ca01a

2 files changed

Lines changed: 68 additions & 4 deletions

File tree

src/api/runtime/__tests__/schemaTokenEstimate.test.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,35 @@ describe('estimateMaxTokensForZodSchema', () => {
4444
expect(nestedBudget).toBeGreaterThanOrEqual(1024);
4545
});
4646

47-
it('clamps to the maximum (8192) for very large schemas', () => {
47+
it('sizes the budget from a string field\'s .max() so large-output schemas do not truncate', () => {
48+
// Regression: z.string() was treated as a flat ~30-token leaf regardless
49+
// of .max(), so a `source: z.string().max(80_000)` field (designed for
50+
// ~20K tokens of generated code) auto-estimated to the 512 floor and the
51+
// provider truncated the JSON mid-string. The estimate must scale with the
52+
// declared max length.
53+
const big = z.object({
54+
source: z.string().max(80_000),
55+
rationale: z.string().max(500),
56+
});
57+
const small = z.object({
58+
source: z.string().max(2_000),
59+
rationale: z.string().max(500),
60+
});
61+
const bigBudget = estimateMaxTokensForZodSchema(big);
62+
const smallBudget = estimateMaxTokensForZodSchema(small);
63+
expect(bigBudget).toBeGreaterThan(smallBudget);
64+
expect(bigBudget).toBeGreaterThanOrEqual(16_000);
65+
});
66+
67+
it('clamps to the maximum (32000) for very large schemas', () => {
4868
// Build a wide schema with many fields and deeply nested arrays so the
4969
// raw walk well exceeds the 8192 cap.
5070
const wide = z.object({
5171
a: z.array(z.array(z.array(z.object({ x: z.string(), y: z.string(), z: z.string() })))),
5272
b: z.array(z.array(z.array(z.object({ x: z.string(), y: z.string(), z: z.string() })))),
5373
c: z.array(z.array(z.array(z.object({ x: z.string(), y: z.string(), z: z.string() })))),
5474
});
55-
expect(estimateMaxTokensForZodSchema(wide)).toBe(8192);
75+
expect(estimateMaxTokensForZodSchema(wide)).toBe(32000);
5676
});
5777

5878
it('unwraps optional / nullable / default wrappers', () => {
@@ -107,6 +127,6 @@ describe('estimateMaxTokensForZodSchema', () => {
107127
expect(() => estimateMaxTokensForZodSchema(cyclic)).not.toThrow();
108128
const tokens = estimateMaxTokensForZodSchema(cyclic);
109129
expect(tokens).toBeGreaterThanOrEqual(512);
110-
expect(tokens).toBeLessThanOrEqual(8192);
130+
expect(tokens).toBeLessThanOrEqual(32000);
111131
});
112132
});

src/api/runtime/schemaTokenEstimate.ts

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
const TOKENS_PER_LEAF = 30; // average tokens per primitive field
1717
const TOKENS_PER_ARRAY_ITEM = 60; // assumed per-element budget for typical strings
1818
const MIN_BUDGET = 512;
19-
const MAX_BUDGET = 8192;
19+
// 2026-05-30: raised 8192 -> 32000. The old 8192 ceiling truncated large-output
20+
// schemas (e.g. a `source: z.string().max(80_000)` codegen field) even when the
21+
// model supports far more, which forced every such caller to pass maxTokens by
22+
// hand. Sonnet 4.x supports ~64K output and Opus 4.8 128K, so 32000 is a safe
23+
// auto-estimate ceiling; callers needing more still pass maxTokens explicitly.
24+
const MAX_BUDGET = 32000;
2025
const HEADROOM = 1.5; // 50% headroom for prose-heavy fields
2126
const MAX_DEPTH = 8; // recursion safety on cyclic-ish schemas
2227

@@ -112,7 +117,46 @@ function walk(node: unknown, depth: number): number {
112117
: TOKENS_PER_LEAF;
113118
}
114119

120+
case 'ZodString': {
121+
const maxChars = stringMaxChars(def);
122+
// A bounded string contributes ~maxChars/4 tokens (~4 chars/token); the
123+
// global HEADROOM multiplier covers denser code / JSON-escaped text.
124+
// Unbounded strings keep the small leaf default — the caller should add
125+
// .max() or pass an explicit maxTokens for large free-form output.
126+
return maxChars && maxChars > 0 ? Math.ceil(maxChars / 4) : TOKENS_PER_LEAF;
127+
}
128+
115129
default:
116130
return TOKENS_PER_LEAF;
117131
}
118132
}
133+
134+
/**
135+
* Extracts a string's `.max(N)` character constraint across Zod v3 + v4
136+
* internal shapes. Returns null when the string is unbounded.
137+
*
138+
* - Zod v3: `_def.checks` is `[{ kind: 'max', value: N }]`.
139+
* - Zod v4: the constraint nests under `_zod.def` (or `def`) with
140+
* `check: 'max_length'` and a numeric `maximum`.
141+
*/
142+
function stringMaxChars(def: Record<string, unknown>): number | null {
143+
const checks = def.checks as unknown;
144+
if (!Array.isArray(checks)) return null;
145+
for (const c of checks) {
146+
const check = c as Record<string, unknown> | null;
147+
if (!check) continue;
148+
if (check.kind === 'max' && typeof check.value === 'number') {
149+
return check.value;
150+
}
151+
const inner = ((check._zod as Record<string, unknown> | undefined)?.def
152+
?? (check.def as Record<string, unknown> | undefined)
153+
?? check) as Record<string, unknown>;
154+
if (
155+
(inner.check === 'max_length' || inner.check === 'max')
156+
&& typeof inner.maximum === 'number'
157+
) {
158+
return inner.maximum;
159+
}
160+
}
161+
return null;
162+
}

0 commit comments

Comments
 (0)