Skip to content

Commit ee55d4d

Browse files
Add basic WebMCP zaMaker! Evals
1 parent 611a2b1 commit ee55d4d

File tree

3 files changed

+197
-10
lines changed

3 files changed

+197
-10
lines changed

demos/pizza-maker/script.js

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ function toggleLayer(layerId, action = 'toggle') {
5151
}
5252
}
5353

54-
function addTopping(emoji, size = 'Medium', count = 5) {
54+
function addTopping(emoji, size, count) {
5555
for (let i = 0; i < count; i++) {
5656
const topping = document.createElement('div');
5757
topping.className = 'topping';
@@ -173,20 +173,38 @@ if (window.navigator.modelContext) {
173173

174174
navigator.modelContext.registerTool({
175175
name: 'set_pizza_size',
176-
description: 'Set the size of the pizza',
176+
description: 'Set the pizza size directly or infer it based on the number of people.',
177177
inputSchema: {
178178
type: 'object',
179179
properties: {
180-
size: { type: 'string', enum: ['Small', 'Medium', 'Large', 'Extra Large'] },
180+
size: {
181+
type: 'string',
182+
enum: ['Small', 'Medium', 'Large', 'Extra Large'],
183+
description: 'The specific size name.',
184+
},
185+
number_of_persons: {
186+
type: 'number',
187+
description: 'The number of people eating to help infer the correct size.',
188+
},
181189
},
182-
required: ['size'],
183190
},
184-
execute: ({ size }) => {
185-
if (sizes[size]) {
186-
changeSize(sizes[size], size);
187-
return `Changed pizza size to ${size}`;
191+
execute: ({ size, number_of_persons }) => {
192+
let finalSize = size;
193+
194+
// Logic to infer size if only number_of_persons is provided
195+
if (!finalSize && number_of_persons) {
196+
if (number_of_persons <= 2) finalSize = 'Small';
197+
else if (number_of_persons <= 4) finalSize = 'Medium';
198+
else if (number_of_persons <= 6) finalSize = 'Large';
199+
else finalSize = 'Extra Large';
200+
}
201+
202+
if (finalSize && sizes[finalSize]) {
203+
changeSize(sizes[finalSize], finalSize);
204+
return `Set pizza size to ${finalSize}${number_of_persons ? ` for ${number_of_persons} people` : ''}.`;
188205
}
189-
return `Invalid size: ${size}`;
206+
207+
return `Could not determine a valid size. Please specify a size or number of guests.`;
190208
},
191209
});
192210

@@ -245,7 +263,7 @@ if (window.navigator.modelContext) {
245263
},
246264
required: ['topping'],
247265
},
248-
execute: ({ topping, size, count }) => {
266+
execute: ({ topping, size = 'Medium', count = 5 }) => {
249267
addTopping(topping, size, count);
250268
return `Added ${count} ${topping} topping(s)`;
251269
},
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# WebMCP zaMaker! Evals
2+
3+
This directory contains evaluation test cases for the [WebMCP zaMaker!](../../../demos/pizza-maker/) demo.
4+
5+
Note that `schema.json` is not included here because these evaluations are designed to be run against the [live demo](https://googlechromelabs.github.io/webmcp-tools/demos/pizza-maker) directly in the evals-cli UI, which discovers the tool schemas dynamically from the page.
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
[
2+
{
3+
"messages": [
4+
{
5+
"role": "user",
6+
"type": "message",
7+
"content": "I'd like a small pizza."
8+
}
9+
],
10+
"expectedCall": [
11+
{
12+
"functionName": "set_pizza_size",
13+
"arguments": { "size": "Small" }
14+
}
15+
]
16+
},
17+
{
18+
"messages": [
19+
{
20+
"role": "user",
21+
"type": "message",
22+
"content": "A Pesto pizza please!"
23+
}
24+
],
25+
"expectedCall": [
26+
{
27+
"functionName": "set_pizza_style",
28+
"arguments": { "style": "Pesto" }
29+
}
30+
]
31+
},
32+
{
33+
"messages": [
34+
{
35+
"role": "user",
36+
"type": "message",
37+
"content": "Add extra sauce."
38+
}
39+
],
40+
"expectedCall": [
41+
{
42+
"functionName": "toggle_layer",
43+
"arguments": { "layer": "sauce-layer", "action": "add" }
44+
}
45+
]
46+
},
47+
{
48+
"messages": [
49+
{
50+
"role": "user",
51+
"type": "message",
52+
"content": "Add bell peppers."
53+
}
54+
],
55+
"expectedCall": [
56+
{
57+
"functionName": "add_topping",
58+
"arguments": { "topping": "🫑" }
59+
}
60+
]
61+
},
62+
{
63+
"messages": [
64+
{
65+
"role": "user",
66+
"type": "message",
67+
"content": "I want 10 mushrooms"
68+
}
69+
],
70+
"expectedCall": [
71+
{
72+
"functionName": "add_topping",
73+
"arguments": { "topping": "🍄", "count": 10 }
74+
}
75+
]
76+
},
77+
{
78+
"messages": [
79+
{
80+
"role": "user",
81+
"type": "message",
82+
"content": "We are 10 actually"
83+
}
84+
],
85+
"expectedCall": [
86+
{
87+
"functionName": "set_pizza_size",
88+
"arguments": { "number_of_persons": 10 }
89+
}
90+
]
91+
},
92+
{
93+
"messages": [
94+
{
95+
"role": "user",
96+
"type": "message",
97+
"content": "Remove corn"
98+
}
99+
],
100+
"expectedCall": [
101+
{
102+
"functionName": "remove_topping",
103+
"arguments": { "topping": "🌽" }
104+
}
105+
]
106+
},
107+
{
108+
"messages": [
109+
{
110+
"role": "user",
111+
"type": "message",
112+
"content": "Let's do it over again"
113+
}
114+
],
115+
"expectedCall": [
116+
{
117+
"functionName": "manage_pizza",
118+
"arguments": { "action": "reset" }
119+
}
120+
]
121+
},
122+
{
123+
"messages": [
124+
{
125+
"role": "user",
126+
"type": "message",
127+
"content": "Add one onion"
128+
},
129+
{
130+
"role": "model",
131+
"type": "functioncall",
132+
"name": "add_topping",
133+
"arguments": {
134+
"topping": "🧅",
135+
"count": "1"
136+
}
137+
},
138+
{
139+
"role": "user",
140+
"type": "functionresponse",
141+
"name": "add_topping",
142+
"response": {
143+
"result": "Added 1 🧅 topping(s)."
144+
}
145+
},
146+
{
147+
"role": "model",
148+
"type": "message",
149+
"content": "I've added an onion. Anything else?"
150+
},
151+
{
152+
"role": "user",
153+
"type": "message",
154+
"content": "Remove last toppping"
155+
}
156+
],
157+
"expectedCall": [
158+
{
159+
"functionName": "manage_pizza",
160+
"arguments": { "action": "remove_last" }
161+
}
162+
]
163+
}
164+
]

0 commit comments

Comments
 (0)