Skip to content

Commit 8c0545f

Browse files
author
Aegis AI Assistant
committed
fix: increase global token limits for Tool Use overhead
1 parent 9fed9d2 commit 8c0545f

1 file changed

Lines changed: 11 additions & 10 deletions

File tree

skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,11 @@ async function llmCall(messages, opts = {}) {
309309
// - Local llama-server: requires 'max_tokens', may not understand 'max_completion_tokens'
310310
const isCloudApi = !opts.vlm && (LLM_API_TYPE === 'openai' || LLM_BASE_URL.includes('openai.com') || LLM_BASE_URL.includes('api.anthropic'));
311311

312-
// No max_tokens for any API — the streaming loop's 2000-token hard cap is the safety net.
313-
// Sending max_tokens to thinking models (Qwen3.5) starves actual output since
314-
// reasoning_content counts against the limit.
312+
// Inject max_tokens to override the server's default limit (typically 2048 for SwiftLM),
313+
// which would otherwise prematurely truncate models with extensive reasoning/thinking output.
314+
const maxTokensField = isCloudApi ? 'max_completion_tokens' : 'max_tokens';
315+
const computedMaxTokens = opts.maxTokens || 4096; // Ensure sufficient headroom for tools/reasoning
315316

316-
// Lookup model-family-specific config (e.g. reasoning_effort for Mistral,
317317
// minTemperature for Nemotron/LFM2).
318318
// VLM calls skip the LLM family table — VLM models are always local llava-compatible.
319319
const modelFamily = opts.vlm ? {} : getModelFamily(model || LLM_MODEL);
@@ -341,6 +341,7 @@ async function llmCall(messages, opts = {}) {
341341
// which activates prefix buffering to strip hallucinated artifacts
342342
...(opts.expectJSON && !isCloudApi && { response_format: { type: 'json_object' } }),
343343
...(opts.tools && { tools: opts.tools }),
344+
[maxTokensField]: computedMaxTokens,
344345
// Model-family-specific params (e.g. reasoning_effort:'none' for Mistral).
345346
// These are merged last so they take precedence over defaults.
346347
...modelFamilyParams,
@@ -483,14 +484,14 @@ async function llmCall(messages, opts = {}) {
483484
}
484485
}
485486
// Hard cap: abort if token count far exceeds maxTokens
486-
if (opts.maxTokens && tokenCount > opts.maxTokens * 2) {
487-
log(` ⚠ Aborting: ${tokenCount} tokens exceeds ${opts.maxTokens}×2 safety limit`);
487+
if (computedMaxTokens && tokenCount > computedMaxTokens * 2) {
488+
log(` ⚠ Aborting: ${tokenCount} tokens exceeds ${computedMaxTokens}×2 safety limit`);
488489
controller.abort();
489490
break;
490491
}
491-
// Global safety limit: no benchmark test should ever need >2000 tokens
492-
if (tokenCount > 2000) {
493-
log(` ⚠ Aborting: ${tokenCount} tokens exceeds global 2000-token safety limit`);
492+
// Global safety limit: no benchmark test should ever need >8000 tokens
493+
if (tokenCount > 8192) {
494+
log(` ⚠ Aborting: ${tokenCount} tokens exceeds global 8192-token safety limit`);
494495
controller.abort();
495496
break;
496497
}
@@ -2636,7 +2637,7 @@ async function main() {
26362637
if (TEST_MODE !== 'full') {
26372638
const isVlmSuite = (name) => name.includes('VLM Scene') || name.includes('📸');
26382639
const originalCount = suites.length;
2639-
if (TEST_MODE === 'llm') {
2640+
if (TEST_MODE !== 'vlm') {
26402641
// Remove VLM image-analysis suites (VLM-to-Alert Triage stays — it's LLM-based text triage)
26412642
for (let i = suites.length - 1; i >= 0; i--) {
26422643
if (isVlmSuite(suites[i].name)) suites.splice(i, 1);

0 commit comments

Comments
 (0)