Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/lib/llm-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@ function parseOpenAiLine(line: string): string | null {
if (data === "[DONE]") return null
try {
const parsed = JSON.parse(data) as {
choices: Array<{ delta: { content?: string } }>
choices: Array<{ delta: { content?: string; reasoning_content?: string } }>
}
return parsed.choices?.[0]?.delta?.content ?? null
const delta = parsed.choices?.[0]?.delta
// Reasoning models (e.g. Nemotron, DeepSeek-R1) emit thinking tokens in
// reasoning_content while content is null. Fall back so the accumulated
// text includes both the thinking phase and the final answer.
return delta?.content ?? delta?.reasoning_content ?? null
} catch {
return null
}
Expand Down Expand Up @@ -64,7 +68,11 @@ function parseGoogleLine(line: string): string | null {
}

function buildOpenAiBody(messages: ChatMessage[]): Record<string, unknown> {
return { messages, stream: true }
// max_tokens must be set explicitly: without it many local servers (LM Studio,
// llama.cpp) fall back to a small default (often 2 048) that is too short for
// multi-file wiki generation. 8 192 matches common local model limits while
// leaving ample room for reasoning models that spend tokens on thinking first.
return { messages, stream: true, max_tokens: 8192 }
}

function buildAnthropicBody(messages: ChatMessage[]): Record<string, unknown> {
Expand Down