Skip to content

Commit 1205850

Browse files
authored
Enable using different LLMs for extract/query (#87)
1 parent 10c6de5 commit 1205850

11 files changed

Lines changed: 217 additions & 36 deletions

File tree

docs/advanced/roles.mdx

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
---
2+
title: LLM Roles
3+
description: "Designate different LLMs for different responsibilities"
4+
icon: venetian-mask
5+
---
6+
7+
You can customize the Magnitude agent to use different LLMs for each of the three primary operations: `act`, `extract`, `query`.
8+
9+
By default when a single LLM is provided, all responsibilites will be handled by that LLM. However, by specifying different LLMs for certain roles you may be able to save on cost and speed.
10+
11+
Example:
12+
```typescript
13+
import { startBrowserAgent } from '../src/agent/browserAgent';
14+
import z from 'zod';
15+
16+
async function main() {
17+
const agent = await startBrowserAgent({
18+
url: 'https://magnitasks.com/tasks',
19+
narrate: true,
20+
llm: [
21+
{
22+
provider: 'claude-code',
23+
options: {
24+
model: 'claude-sonnet-4-20250514'
25+
}
26+
},
27+
{
28+
roles: ['extract'],
29+
provider: 'google-ai',
30+
options: {
31+
model: 'gemini-2.5-flash-lite-preview-06-17',//'gemini-2.5-flash'
32+
}
33+
},
34+
{
35+
roles: ['query'],
36+
provider: 'google-ai',
37+
options: {
38+
// Balance intelligent querying and cheap tokens
39+
model: 'gemini-2.5-flash'
40+
}
41+
}
42+
]
43+
});
44+
45+
const tasks = await agent.extract(
46+
'Extract all tasks in To Do column',
47+
z.array(z.object({ title: z.string(), desc: z.string() }))
48+
); // ^ this will use gemini-2.5-flash-lite-preview-06-17
49+
50+
await agent.act('Move each to in progress', { data: tasks });
51+
// ^ this will use Claude
52+
53+
const numTodosMoved = await agent.query(
54+
'How many todos were moved?',
55+
z.number()
56+
); // ^ this will use gemini-2.5-flash
57+
58+
console.log(numTodosMoved);
59+
60+
await agent.stop();
61+
}
62+
63+
main();
64+
```
65+
66+
One great use case for this is to reduce the cost of extracting data. While `act` requires an intelligent and [visually grounded model](/core-concepts/compatible-llms), `extract` and `query` do not require grounded models, and can often work fine with less intelligent models.
67+
68+
General recommendations:
69+
- `act`: MUST use an [intelligent, visually grounded model](/core-concepts/compatible-llms)
70+
- `extract`: Can use a fast and cheap model, like `gemini-2.5-flash` or even `gemini-2.5-flash-lite`
71+
- `query`: Can use any model that's reasonably intelligent but fast, depending on the complexity of the queries you plan to ask. `gemini-2.5-flash` might be a good option.
72+

docs/core-concepts/query.mdx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
title: Asking Questions
3+
description: "Query the agent about things that happened"
4+
icon: message-circle
5+
---
6+
7+
You can use `agent.query` in order to ask the agent about the actions it just took or anything that happened in the last call to `agent.act`.

docs/docs.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@
8181
{
8282
"group": "Advanced",
8383
"pages": [
84-
"advanced/memory"
84+
"advanced/memory",
85+
"advanced/roles"
8586
]
8687
},
8788
{

packages/magnitude-core/src/agent/browserAgent.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ export class BrowserAgent extends Agent {
152152
const markdown = serializeToMarkdown(result, markdownOptions);
153153

154154
const screenshot = await this.require(BrowserConnector).getHarness().screenshot();
155-
const data = await this.model.extract(instructions, schema, screenshot, markdown);
155+
const data = await this.models.extract(instructions, schema, screenshot, markdown);
156156

157157
this.browserAgentEvents.emit('extractDone', instructions, data);
158158

packages/magnitude-core/src/agent/index.ts

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ import { telemetrifyAgent } from '@/telemetry/events';
1717
import { isClaude } from '@/ai/util';
1818
import { retryOnError } from '@/common';
1919
import { renderContentParts } from '@/memory/rendering';
20+
import { MultiModelHarness } from '@/ai/multiModelHarness';
2021

2122

2223
export interface AgentOptions {
23-
llm?: LLMClient;
24+
llm?: LLMClient | LLMClient[];
2425
connectors?: AgentConnector[];
2526
actions?: ActionDefinition<any>[]; // any additional actions not provided by connectors
2627
prompt?: string | null; // additional agent-level system prompt instructions
@@ -58,7 +59,9 @@ export class Agent {
5859

5960
private memoryOptions: AgentMemoryOptions;
6061

61-
public readonly model: ModelHarness;
62+
public readonly models: MultiModelHarness;
63+
64+
//public readonly model: ModelHarness;
6265
//public readonly micro: GroundingService;
6366
//public readonly events: EventEmitter<AgentEvents>;
6467

@@ -90,15 +93,27 @@ export class Agent {
9093
// TODO: maybe error instead, or automatically differentiate them?
9194
//this.options.actions = Array.from(new Map(aggregatedActions.map(actDef => [actDef.name, actDef])).values());
9295

93-
const doPromptCaching = ('promptCaching' in this.options.llm.options) ? this.options.llm.options.promptCaching : isClaude(this.options.llm) && (this.options.llm.provider === 'anthropic' || this.options.llm.provider === 'claude-code');
94-
//console.log('doPromptCaching?', doPromptCaching)
95-
96-
//promptCaching: doPromptCaching
97-
//if ('promptCaching' in this.options.llm.options) this.options.llm.options.promptCaching = doPromptCaching;
98-
// needs testing
99-
if (this.options.llm.provider === 'anthropic' || this.options.llm.provider === 'claude-code') this.options.llm.options.promptCaching = doPromptCaching;
100-
this.model = new ModelHarness({ llm: this.options.llm });
101-
this.model.events.on('tokensUsed', (usage) => this.events.emit('tokensUsed', usage), this);
96+
const llms = Array.isArray(this.options.llm) ? this.options.llm : [this.options.llm];
97+
98+
let doPromptCaching = false;
99+
for (const client of llms ) {
100+
// If any LLM is prompt-caching compatible, turn on prompt caching overall for memory etc.
101+
if (isClaude(client) && (client.provider === 'anthropic' || client.provider === 'claude-code')) {
102+
// Prompt-caching compatible client
103+
104+
if ('promptCaching' in client.options && client.options.promptCaching !== undefined) {
105+
doPromptCaching = client.options.promptCaching;
106+
} else {
107+
// Default to true if not specified, and override on client config to true
108+
doPromptCaching = true;
109+
client.options.promptCaching = true;
110+
}
111+
}
112+
}
113+
114+
//this.model = new ModelHarness({ llm: this.options.llm });
115+
this.models = new MultiModelHarness(llms);
116+
this.models.events.on('tokensUsed', (usage) => this.events.emit('tokensUsed', usage), this);
102117
this.doneActing = false;
103118

104119
this.memoryOptions = {
@@ -130,7 +145,7 @@ export class Agent {
130145
if (this.options.telemetry) telemetrifyAgent(this);
131146

132147
//console.log('setting up model')
133-
await this.model.setup();
148+
await this.models.setup();
134149
//console.log('done setting up model')
135150

136151
logger.info("Agent: Starting connectors...");
@@ -329,7 +344,7 @@ export class Agent {
329344
const memoryContext = await this._buildContext(memory);
330345
await retryOnError(
331346
async () => {
332-
({ reasoning, actions } = await this.model.createPartialRecipe(
347+
({ reasoning, actions } = await this.models.partialAct(
333348
memoryContext,
334349
description,
335350
dataContentParts,
@@ -395,7 +410,7 @@ export class Agent {
395410
// Record observations in case no act() was used beforehand
396411
await this._recordConnectorObservations(this.latestTaskMemory);
397412
const memoryContext = await this._buildContext(this.memory);//this.memory.buildContext(this.connectors);
398-
return await this.model.query(memoryContext, query, schema);
413+
return await this.models.query(memoryContext, query, schema);
399414
}
400415

401416
async queueDone() {

packages/magnitude-core/src/agent/narrator.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@ export function narrateAgent(agent: Agent) {
2222
});
2323

2424
agent.events.on('start', () => {
25-
console.log(bold(blueBright(`▶ [start] agent started with ${agent.model.describeModel()}`)));
25+
console.log(bold(blueBright(`▶ [start] agent started with ${agent.models.describe()}`)));
2626
});
2727

2828
agent.events.on('stop', () => {
2929
console.log(bold(blueBright(`■ [stop] agent stopped`)));
3030

3131
console.log(` Total usage: ` + bold`${totalInputTokens + totalCachedWriteInputTokens + totalCachedReadInputTokens}` + ` input tokens` + (totalCachedWriteInputTokens > 0 || totalCachedReadInputTokens > 0 ? ` (${totalCachedWriteInputTokens} cache write, ${totalCachedReadInputTokens} cache read)` : '') + ` / ` + bold`${totalOutputTokens}` + ` output tokens`);
3232
if (totalInputTokenCost > 0 || totalOutputTokenCost > 0) {
33-
if (agent.model.describeModel().startsWith('claude-code')) {
33+
if (agent.models.numUniqueModels === 1 && agent.models.describe().startsWith('claude-code')) {
3434
console.log(` Cost: ` + cyanBright`None - using Claude Pro or Max subscription`)
3535
} else {
3636
console.log(` Cost: $${(totalInputTokenCost + totalOutputTokenCost).toFixed(3)}`);

packages/magnitude-core/src/ai/modelHarness.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ export class ModelHarness {
7777
return `${this.options.llm.provider}:${'model' in this.options.llm.options ? this.options.llm.options.model : 'unknown'}`;
7878
}
7979

80-
reportUsage(): void {
80+
private _reportUsage(): void {
8181
// console.log('this.collector.last', this.collector.last)
8282
// if (this.collector.last) console.log("calls:", this.collector.last.calls)//console.log("Response: ", this.collector.last.calls[-1].httpResponse);
8383
//console.log('last call:', this.collector.last?.calls.at(-1)?.httpResponse?.body.json());
@@ -116,7 +116,8 @@ export class ModelHarness {
116116
const knownCostMap: Record<string, { inputTokens: number, outputTokens: number, cacheWriteInputTokens?: number, cacheReadInputTokens?: number }> = {
117117
// TODO: track cached savings on Gemini
118118
'gemini-2.5-pro': { inputTokens: 1.25, outputTokens: 10.0 },
119-
'gemini-2.5-flash': { inputTokens: 0.15, outputTokens: 0.60 },
119+
'gemini-2.5-flash': { inputTokens: 0.30, outputTokens: 2.50 },
120+
'gemini-2.5-flash-lite': { inputTokens: 0.10, outputTokens: 0.40 },
120121
'claude-3.5-sonnet': { inputTokens: 3.00, outputTokens: 15.00, cacheWriteInputTokens: 3.75, cacheReadInputTokens: 0.30 },
121122
'claude-3.7-sonnet': { inputTokens: 3.00, outputTokens: 15.00, cacheWriteInputTokens: 3.75, cacheReadInputTokens: 0.30 },
122123
'claude-sonnet-4': { inputTokens: 3.00, outputTokens: 15.00, cacheWriteInputTokens: 3.75, cacheReadInputTokens: 0.30 },
@@ -172,7 +173,7 @@ export class ModelHarness {
172173
this.prevTotalOutputTokens = outputTokens;
173174
}
174175

175-
async createPartialRecipe<T>(
176+
async partialAct<T>(
176177
context: AgentContext, // Changed to ModularMemoryContext
177178
task: string,
178179
data: MultiMediaContentPart[],
@@ -195,7 +196,7 @@ export class ModelHarness {
195196
this.logger.trace(`createPartialRecipe took ${Date.now()-start}ms`);
196197
// BAML does not carry over action type to @@dynamic of PartialRecipe, so forced cast necssary
197198
//return response as unknown as { actions: z.infer<ActionDefinition<T>['schema']>[] };//, finished: boolean };
198-
this.reportUsage();
199+
this._reportUsage();
199200
return {
200201
reasoning: response.reasoning,//(response.observations ? response.observations + " " : "") + response.meta_reasoning + " " + response.reasoning,
201202
actions: response.actions// as z.infer<ActionDefinition<T>['schema']>[]
@@ -225,7 +226,7 @@ export class ModelHarness {
225226
this.options.llm.provider === 'claude-code',
226227
{ tb }
227228
);
228-
this.reportUsage();
229+
this._reportUsage();
229230

230231
if (schema instanceof z.ZodObject) {
231232
return resp;
@@ -254,7 +255,7 @@ export class ModelHarness {
254255
this.options.llm.provider === 'claude-code',
255256
{ tb }
256257
);
257-
this.reportUsage();
258+
this._reportUsage();
258259

259260
if (schema instanceof z.ZodObject) {
260261
return resp;
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { MultiMediaContentPart } from "@/memory/rendering";
2+
import { ModelHarness, ModelHarnessEvents } from "./modelHarness";
3+
import { allBrowserAgentRoles, BrowserAgentRole, LLMClient } from "./types";
4+
import { ActionDefinition } from "@/actions";
5+
import { AgentContext } from "./baml_client";
6+
import { Action } from "@/actions/types";
7+
import { Image } from '@/memory/image';
8+
import EventEmitter from "eventemitter3";
9+
import z from "zod";
10+
11+
12+
export class MultiModelHarness {
13+
/**
14+
* Delegates model responsibilites to different LLMs and consolidates their usage
15+
*/
16+
// Roles may reference the same harness
17+
private roles: Record<BrowserAgentRole, ModelHarness> = {} as Record<BrowserAgentRole, ModelHarness>;
18+
private uniqueModels: ModelHarness[] = [];
19+
20+
public readonly events: EventEmitter<ModelHarnessEvents> = new EventEmitter();
21+
22+
constructor(clients: LLMClient[]) {
23+
// Sort by specificity (from least specific to most specific)
24+
const sortedClients = clients.toSorted((a, b) => (b.roles ? b.roles.length : 9999) - (a.roles ? a.roles.length : 9999));
25+
for (const client of sortedClients) {
26+
const harness = new ModelHarness({ llm: client });
27+
this.uniqueModels.push(harness);
28+
if (client.roles) {
29+
for (const role of client.roles) {
30+
this.roles[role] = harness;
31+
}
32+
} else {
33+
for (const role of allBrowserAgentRoles) {
34+
this.roles[role] = harness;
35+
}
36+
}
37+
38+
// Forward token usage events upward
39+
harness.events.on('tokensUsed', (usage) => { this.events.emit('tokensUsed', usage) }, this);
40+
}
41+
}
42+
43+
async setup() {
44+
await Promise.all(this.uniqueModels.map(model => model.setup()));
45+
}
46+
47+
describe(): string {
48+
// for now - describe least specific model
49+
return this.uniqueModels[0].describeModel();
50+
}
51+
52+
// TODO: generalize responsibility delegation
53+
async partialAct<T>(
54+
context: AgentContext,
55+
task: string,
56+
data: MultiMediaContentPart[],
57+
actionVocabulary: ActionDefinition<T>[]
58+
): Promise<{ reasoning: string, actions: Action[] }> {
59+
return await this.roles['act'].partialAct(context, task, data, actionVocabulary);
60+
}
61+
62+
async extract<T extends z.Schema>(instructions: string, schema: T, screenshot: Image, domContent: string): Promise<z.infer<T>> {
63+
return await this.roles['extract'].extract(instructions, schema, screenshot, domContent);
64+
}
65+
66+
async query<T extends z.Schema>(context: AgentContext, query: string, schema: T): Promise<z.infer<T>> {
67+
return await this.roles['query'].query(context, query, schema);
68+
}
69+
70+
get numUniqueModels() {
71+
return this.uniqueModels.length;
72+
}
73+
}

packages/magnitude-core/src/ai/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33
// confidence: number
44
// }
55

6+
export type BrowserAgentRole= 'act' | 'extract' | 'query';
7+
export const allBrowserAgentRoles: BrowserAgentRole[] = ['act', 'extract', 'query'] as const;
8+
69
// Approximately mirrors https://docs.boundaryml.com/ref/llm-client-providers
7-
export type LLMClient = AnthropicClient | ClaudeCodeClient | BedrockClient | GoogleAIClient | GoogleVertexClient | OpenAIClient | OpenAIGenericClient | AzureOpenAIClient;
10+
export type LLMClient = (AnthropicClient | ClaudeCodeClient | BedrockClient | GoogleAIClient | GoogleVertexClient | OpenAIClient | OpenAIGenericClient | AzureOpenAIClient) &
11+
{ roles?: BrowserAgentRole[] };
812
export type GroundingClient = MoondreamClient;
913

1014
export interface AnthropicClient {

packages/magnitude-core/src/ai/util.ts

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -238,28 +238,33 @@ export function buildDefaultBrowserAgentOptions(
238238
//const { llm: envLlm, grounding: envGrounding } = tryDeriveUIGroundedClients();
239239
const envLlm = tryDeriveUIGroundedClient();
240240

241-
let llm: LLMClient | null = agentOptions.llm ?? envLlm;
241+
//let llm: LLMClient | null = agentOptions.llm ?? envLlm;
242+
let llms: LLMClient[] = agentOptions.llm ? (Array.isArray(agentOptions.llm) ? agentOptions.llm : [agentOptions.llm]) : (envLlm ? [envLlm] : []);
242243
const grounding = browserOptions.grounding;//(llm && isGroundedLlm(llm)) ? null : (browserOptions.grounding ?? envGrounding);
243244

244-
if (!llm) {
245+
if (llms.length == 0) {
245246
throw new Error("No LLM configured or available from environment. Set environment variable ANTHROPIC_API_KEY and try again. See https://docs.magnitude.run/customizing/llm-configuration for details");
246247
}
247248
// else if (!isGroundedLlm(llm) && !grounding) {
248249
// throw new Error("Ungrounded LLM is configured without Moondream. Either use Anthropic (set ANTHROPIC_API_KEY) or provide a MOONDREAM_API_KEY");
249250
// }
250251

251252
// Set reasonable temp if not provided
252-
let llmOptions: LLMClient['options'] = { temperature: DEFAULT_BROWSER_AGENT_TEMP, ...(llm?.options ?? {}) };
253-
llm = {...llm, options: llmOptions as any }
254-
255253
let virtualScreenDimensions = null;
256-
if (isClaude(llm)) {
257-
// Claude grounding only really works on 1024x768 screenshots
258-
virtualScreenDimensions = { width: 1024, height: 768 };
254+
for (const llm of llms) {
255+
let llmOptions: LLMClient['options'] = { temperature: DEFAULT_BROWSER_AGENT_TEMP, ...(llm?.options ?? {}) };
256+
//let modifiedLlm = {...llm, options: llmOptions as any }
257+
llm.options = llmOptions;
258+
259+
if (isClaude(llm)) {
260+
// Claude grounding only really works on 1024x768 screenshots
261+
// if any model is claude, use virtual screen dimensions
262+
virtualScreenDimensions = { width: 1024, height: 768 };
263+
}
259264
}
260265

261266
return {
262-
agentOptions: {...agentOptions, llm: llm },
267+
agentOptions: {...agentOptions, llm: llms },
263268
browserOptions: {...browserOptions, grounding: grounding ?? undefined, virtualScreenDimensions: virtualScreenDimensions ?? undefined }
264269
};
265270
}

0 commit comments

Comments
 (0)